diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml index 85db407db7..2aedc199f1 100644 --- a/.github/workflows/presubmit.yml +++ b/.github/workflows/presubmit.yml @@ -6,21 +6,16 @@ jobs: name: Build ${{ matrix.os }} ${{ matrix.name }} runs-on: ${{ matrix.os }} env: - JOB_CHECK_FORMAT: ${{ matrix.format }} JOB_ARCHITECTURE: ${{ matrix.arch }} JOB_ENABLE_GL: ${{ matrix.gl }} strategy: matrix: mainmatrix: [true] - os: [ubuntu-20.04, macos-11.0] + os: [ubuntu-20.04, macos-latest] include: - os: ubuntu-20.04 mainmatrix: true gl: 1 - - os: ubuntu-20.04 - mainmatrix: false - name: Format - format: 1 - os: ubuntu-20.04 mainmatrix: false name: Arm @@ -30,11 +25,17 @@ jobs: name: AArch64 arch: aarch64 steps: - - name: Setup - run: if [[ "${{matrix.format}}" == "1" ]]; then sudo apt install -y clang-format; fi - uses: actions/checkout@v2 - with: - fetch-depth: 0 - name: Build run: ./presubmit.sh - + formatcheck: + name: Check code format + runs-on: ubuntu-20.04 + steps: + - name: Install packages + run: sudo apt install -y clang-format clang-format-9 + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + - name: Check code format + run: ./check-format.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index 844283aaaa..7b307a119d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,12 +10,6 @@ set(CMAKE_C_STANDARD_REQUIRED ON) set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED ON) -if(CMAKE_BUILD_TYPE STREQUAL "release") - set (BUILD_FLAVOR "release") -else(CMAKE_BUILD_TYPE STREQUAL "release") - set (BUILD_FLAVOR "debug") -endif(CMAKE_BUILD_TYPE STREQUAL "release") - add_definitions(-DCL_TARGET_OPENCL_VERSION=300) add_definitions(-DCL_USE_DEPRECATED_OPENCL_2_2_APIS=1) add_definitions(-DCL_USE_DEPRECATED_OPENCL_2_1_APIS=1) @@ -29,14 +23,6 @@ if(USE_CL_EXPERIMENTAL) add_definitions(-DCL_EXPERIMENTAL) endif(USE_CL_EXPERIMENTAL) -# Support both VS2008 and VS2012. -set(BUILD_DIR "$ENV{ADRENO_DRIVER}/build") -if(MSVC90) - set(VS_BUILD_DIR "${BUILD_DIR}/vs2008") -else(MSVC110) - set(VS_BUILD_DIR "${BUILD_DIR}/vs2012") -endif(MSVC90) - #----------------------------------------------------------- # Default Configurable Test Set #----------------------------------------------------------- @@ -62,58 +48,13 @@ set(CONFORMANCE_SUFFIX "" ) #build driver as a dependency of the conformance tests, or other such CMake customization include(CMakeVendor.txt OPTIONAL) -#----------------------------------------------------------- -# Development options for OpenCL C++ tests -#----------------------------------------------------------- -# Use OpenCL C kernels instead of OpenCL C++ kernels -option(CLPP_DEVELOPMENT_USE_OPENCLC_KERNELS "Use OpenCL C kernels in OpenCL C++ tests" OFF) -if(CLPP_DEVELOPMENT_USE_OPENCLC_KERNELS) - set(CLPP_DEVELOPMENT_OPTIONS ${CLPP_DEVELOPMENT_OPTIONS} -DCLPP_DEVELOPMENT_USE_OPENCLC_KERNELS) -endif(CLPP_DEVELOPMENT_USE_OPENCLC_KERNELS) -# Only check if OpenCL C++ kernels compile to SPIR-V -option(CLPP_DEVELOPMENT_ONLY_SPIRV_COMPILATION "Only check if OpenCL C++ kernels compile to SPIR-V" OFF) -if(CLPP_DEVELOPMENT_ONLY_SPIRV_COMPILATION) - if(CLPP_DEVELOPMENT_USE_OPENCLC_KERNELS) - message(FATAL_ERROR "Can't use OpenCL C kernels and compile to SPIR-V.") - endif(CLPP_DEVELOPMENT_USE_OPENCLC_KERNELS) - set(CLPP_DEVELOPMENT_OPTIONS ${CLPP_DEVELOPMENT_OPTIONS} -DCLPP_DEVELOPMENT_ONLY_SPIRV_COMPILATION) -endif(CLPP_DEVELOPMENT_ONLY_SPIRV_COMPILATION) -# -if(CLPP_DEVELOPMENT_OPTIONS) - add_definitions(-DCLPP_DEVELOPMENT_OPTIONS) - add_definitions(${CLPP_DEVELOPMENT_OPTIONS}) -endif(CLPP_DEVELOPMENT_OPTIONS) - -# Offline OpenCL C/C++ compiler provided by Khronos is the only supported -# offline compiler. -# -# Path to offline OpenCL C/C++ compiler provided by Khronos. -# See https://github.com/KhronosGroup/SPIR/ (spirv-1.1 branch or newer SPIR-V-ready -# branch should be used). -if(KHRONOS_OFFLINE_COMPILER) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DKHRONOS_OFFLINE_COMPILER=${KHRONOS_OFFLINE_COMPILER}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DKHRONOS_OFFLINE_COMPILER=${KHRONOS_OFFLINE_COMPILER}") - # Additional OpenCL C/C++ compiler option. - if(KHRONOS_OFFLINE_COMPILER_OPTIONS) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DKHRONOS_OFFLINE_COMPILER_OPTIONS=${KHRONOS_OFFLINE_COMPILER_OPTIONS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DKHRONOS_OFFLINE_COMPILER_OPTIONS=${KHRONOS_OFFLINE_COMPILER_OPTIONS}") - endif(KHRONOS_OFFLINE_COMPILER_OPTIONS) -else(KHRONOS_OFFLINE_COMPILER) - message(WARNING "KHRONOS_OFFLINE_COMPILER is not defined!") - message(WARNING "Running CL C++ tests will not be possible.") -endif(KHRONOS_OFFLINE_COMPILER) - -# CL_LIBCLCXX_DIR - path to dir with OpenCL C++ STL (libclcxx) # CL_INCLUDE_DIR - path to dir with OpenCL headers -# CL_LIBCLCXX_DIR - path to dir with OpenCL library -if(CL_INCLUDE_DIR AND CL_LIB_DIR AND CL_LIBCLCXX_DIR) +if(CL_INCLUDE_DIR AND CL_LIB_DIR) link_directories(${CL_LIB_DIR}) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DCL_LIBCLCXX_DIR=${CL_LIBCLCXX_DIR}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DCL_LIBCLCXX_DIR=${CL_LIBCLCXX_DIR}") -else(CL_INCLUDE_DIR AND CL_LIB_DIR AND CL_LIBCLCXX_DIR) +else(CL_INCLUDE_DIR AND CL_LIB_DIR) message(STATUS "OpenCL hasn't been found!") - message(FATAL_ERROR "Either install OpenCL or pass -DCL_INCLUDE_DIR, -DCL_LIB_DIR and -DCL_LIBCLCXX_DIR") -endif(CL_INCLUDE_DIR AND CL_LIB_DIR AND CL_LIBCLCXX_DIR) + message(FATAL_ERROR "Either install OpenCL or pass -DCL_INCLUDE_DIR and -DCL_LIB_DIR") +endif(CL_INCLUDE_DIR AND CL_LIB_DIR) # CLConform_GL_LIBRARIES_DIR - path to OpenGL libraries if(GL_IS_SUPPORTED AND CLConform_GL_LIBRARIES_DIR) @@ -151,10 +92,9 @@ if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang" add_cxx_flag_if_supported(-Wno-format) add_cxx_flag_if_supported(-Werror) add_cxx_flag_if_supported(-Wno-error=cpp) # Allow #warning directive - add_cxx_flag_if_supported(-Wno-error=absolute-value) # Issue 783 add_cxx_flag_if_supported(-Wno-error=unknown-pragmas) # Issue #785 add_cxx_flag_if_supported(-Wno-error=asm-operand-widths) # Issue #784 - add_cxx_flag_if_supported(-Wno-error=overflow) # Fixed by #699 + add_cxx_flag_if_supported(-Wno-error=implicit-const-int-float-conversion) # Issue #1250 # -msse -mfpmath=sse to force gcc to use sse for float math, # avoiding excess precision problems that cause tests like int2float @@ -175,6 +115,10 @@ endif() if(MSVC) # Don't warn when using standard non-secure functions. add_compile_definitions(_CRT_SECURE_NO_WARNINGS) + # Don't warn about using the portable "strdup" function. + add_compile_definitions(_CRT_NONSTDC_NO_DEPRECATE) + # Fix std::min and std::max handling with windows.harness. + add_compile_definitions(NOMINMAX) endif() if( WIN32 AND "${CMAKE_CXX_COMPILER_ID}" MATCHES "Intel" ) @@ -197,10 +141,6 @@ if(LINK_PTHREAD) list(APPEND CLConform_LIBRARIES pthread) endif() -if(DEFINED USE_GLES3) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGLES3") -endif() - if(APPLE) find_library(corefoundation CoreFoundation) find_library(iokit IOKit) @@ -214,39 +154,5 @@ include_directories(${CLConform_SOURCE_DIR}/test_common/harness ${CLConform_SOURCE_DIR}/test_common/gl ${CLConform_SOURCE_DIR}/test_common) -if(CMAKE_BUILD_TYPE STREQUAL "release") - set (BUILD_FLAVOR "release") -elseif (CMAKE_BUILD_TYPE STREQUAL "debug") - set (BUILD_FLAVOR "debug") -endif(CMAKE_BUILD_TYPE STREQUAL "release") - - add_subdirectory(test_common) add_subdirectory(test_conformance) -add_subdirectory(test_extensions) - -# Support both VS2008 and VS2012. -set (DLL_FILES "${VS_BUILD_DIR}/Debug/*.dll") -set (DST_DIR "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/Debug/") - -if (WIN32) - set (COPY "echo") - add_custom_target(COPY_DLL${CONFORMANCE_SUFFIX} ALL - COMMAND ${COPY} "${DLL_FILES}" "${DST_DIR}" - COMMENT "Copying dll files.. ") -else (WIN32) - set (COPY cp) - add_custom_target(COPY_DLL${CONFORMANCE_SUFFIX}) -endif(WIN32) - -set_property(TARGET COPY_DLL${CONFORMANCE_SUFFIX} PROPERTY FOLDER "CONFORMANCE${CONFORMANCE_SUFFIX}") - -if(WIN32) - add_custom_target( COPY_FILES${CONFORMANCE_SUFFIX} ALL - COMMAND ${COPY} ${DLL_FILES} ${DST_DIR} - COMMENT "Copying other files to output folder..." ) -else(WIN32) - add_custom_target( COPY_FILES${CONFORMANCE_SUFFIX} ) -endif(WIN32) - -set_property(TARGET COPY_FILES${CONFORMANCE_SUFFIX} PROPERTY FOLDER "CONFORMANCE${CONFORMANCE_SUFFIX}") diff --git a/check-format.sh b/check-format.sh index 7eae2fdc15..7de2bd2c99 100755 --- a/check-format.sh +++ b/check-format.sh @@ -2,12 +2,10 @@ # Arg used to specify non-'origin/master' comparison branch ORIGIN_BRANCH=${1:-"origin/master"} +CLANG_BINARY=${2:-"`which clang-format-9`"} # Run git-clang-format to check for violations -if [ "$TRAVIS" == "true" ]; then - EXTRA_OPTS="--binary `which clang-format-9`" -fi -CLANG_FORMAT_OUTPUT=$(git-clang-format --diff $ORIGIN_BRANCH --extensions c,cpp,h,hpp $EXTRA_OPTS) +CLANG_FORMAT_OUTPUT=$(git-clang-format --diff $ORIGIN_BRANCH --extensions c,cpp,h,hpp --binary $CLANG_BINARY) # Check for no-ops grep '^no modified files to format$' <<<"$CLANG_FORMAT_OUTPUT" && exit 0 diff --git a/presubmit.sh b/presubmit.sh index 646a7f00e3..6fc037c8d5 100755 --- a/presubmit.sh +++ b/presubmit.sh @@ -4,11 +4,6 @@ set -e export TOP=$(pwd) -if [[ "${JOB_CHECK_FORMAT}" == "1" ]]; then - ./check-format.sh - exit $? -fi - TOOLCHAIN_URL_arm="https://releases.linaro.org/components/toolchain/binaries/7.5-2019.12/arm-linux-gnueabihf/gcc-linaro-7.5.0-2019.12-x86_64_arm-linux-gnueabihf.tar.xz" TOOLCHAIN_URL_aarch64="https://releases.linaro.org/components/toolchain/binaries/7.5-2019.12/aarch64-linux-gnu/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu.tar.xz" @@ -60,17 +55,13 @@ cd build cmake -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} -DOPENCL_ICD_LOADER_HEADERS_DIR=${TOP}/OpenCL-Headers/ .. make -# Get libclcxx -cd ${TOP} -git clone https://github.com/KhronosGroup/libclcxx.git - # Build CTS +cd ${TOP} ls -l mkdir build cd build cmake -DCL_INCLUDE_DIR=${TOP}/OpenCL-Headers \ -DCL_LIB_DIR=${TOP}/OpenCL-ICD-Loader/build \ - -DCL_LIBCLCXX_DIR=${TOP}/libclcxx \ -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \ -DCMAKE_RUNTIME_OUTPUT_DIRECTORY=./bin \ -DOPENCL_LIBRARIES="-lOpenCL -lpthread" \ diff --git a/test_common/CMakeLists.txt b/test_common/CMakeLists.txt index 2d4bc19091..b05053459f 100644 --- a/test_common/CMakeLists.txt +++ b/test_common/CMakeLists.txt @@ -1,6 +1,5 @@ set(HARNESS_SOURCES - harness/threadTesting.cpp harness/typeWrappers.cpp harness/mt19937.cpp harness/conversions.cpp @@ -22,4 +21,3 @@ set(HARNESS_SOURCES ) add_library(harness STATIC ${HARNESS_SOURCES}) - diff --git a/test_common/gl/setup_win32.cpp b/test_common/gl/setup_win32.cpp index b120a36d6f..708e681d80 100644 --- a/test_common/gl/setup_win32.cpp +++ b/test_common/gl/setup_win32.cpp @@ -13,14 +13,11 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#define GL_GLEXT_PROTOTYPES #include "setup.h" #include "testBase.h" #include "harness/errorHelpers.h" -#include -#include #include typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)( diff --git a/test_common/gl/setup_x11.cpp b/test_common/gl/setup_x11.cpp index c54ecdec33..abc065c94c 100644 --- a/test_common/gl/setup_x11.cpp +++ b/test_common/gl/setup_x11.cpp @@ -13,16 +13,11 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#define GL_GLEXT_PROTOTYPES #include "setup.h" #include "testBase.h" #include "harness/errorHelpers.h" -#include -#include -#include -#include #include #include @@ -90,10 +85,17 @@ class X11GLEnvironment : public GLEnvironment } for (int i=0; i<(int)num_of_devices; i++) { - if (!is_extension_available(devices[i], "cl_khr_gl_sharing ")) { - log_info("Device %d of %d does not support required extension cl_khr_gl_sharing.\n", i+1, num_of_devices); - } else { - log_info("Device %d of %d supports required extension cl_khr_gl_sharing.\n", i+1, num_of_devices); + if (!is_extension_available(devices[i], "cl_khr_gl_sharing")) + { + log_info("Device %d of %d does not support required extension " + "cl_khr_gl_sharing.\n", + i + 1, num_of_devices); + } + else + { + log_info("Device %d of %d supports required extension " + "cl_khr_gl_sharing.\n", + i + 1, num_of_devices); found_valid_device = 1; m_devices[m_device_count++] = devices[i]; } diff --git a/test_common/gles/helpers.cpp b/test_common/gles/helpers.cpp index 34f40b4c3b..57a4ddc179 100644 --- a/test_common/gles/helpers.cpp +++ b/test_common/gles/helpers.cpp @@ -22,7 +22,7 @@ {GLint __error = glGetError(); if(__error) {log_error( "GL ERROR: %s!\n", gluErrorString( err ));}} #if defined(__linux__) || defined(GL_ES_VERSION_2_0) -// On linux we dont link to GLU library to avoid comaptibility issues with +// On linux we don't link to GLU library to avoid compatibility issues with // libstdc++ // FIXME: Implement this const GLubyte* gluErrorString (GLenum error) @@ -271,8 +271,6 @@ void * ReadGLTexture( GLenum glTarget, GLuint glTexture, // Read results from the GL texture glBindTexture(get_base_gl_target(glTarget), glTexture); - GLint realWidth, realHeight; - GLint realInternalFormat; GLenum readBackFormat = GL_RGBA; GLenum readBackType = glType; glFramebufferWrapper glFramebuffer; @@ -301,7 +299,7 @@ void * ReadGLTexture( GLenum glTarget, GLuint glTexture, GetGLFormatName(readBackFormat), GetGLTypeName(readBackType)); - DumpGLBuffer(readBackType, realWidth, realHeight, (void*)outBuffer); + DumpGLBuffer(readBackType, outWidth, outHeight, (void *)outBuffer); #endif diff --git a/test_common/gles/helpers.h b/test_common/gles/helpers.h index 5bd0fdf1f9..207687875c 100644 --- a/test_common/gles/helpers.h +++ b/test_common/gles/helpers.h @@ -30,11 +30,10 @@ #if !defined (__APPLE__) #include -#include "gl_headers.h" #include -#else -#include "gl_headers.h" +#include #endif +#include "gl_headers.h" #include "harness/errorHelpers.h" #include "harness/kernelHelpers.h" diff --git a/test_common/harness/ThreadPool.cpp b/test_common/harness/ThreadPool.cpp index 31985aa090..627980458c 100644 --- a/test_common/harness/ThreadPool.cpp +++ b/test_common/harness/ThreadPool.cpp @@ -22,6 +22,8 @@ #if defined(__APPLE__) || defined(__linux__) || defined(_WIN32) // or any other POSIX system +#include + #if defined(_WIN32) #include #if defined(_MSC_VER) @@ -241,7 +243,7 @@ pthread_cond_t cond_var; // Condition variable state. How many iterations on the function left to run, // set to CL_INT_MAX to cause worker threads to exit. Note: this value might // go negative. -volatile cl_int gRunCount = 0; +std::atomic gRunCount{ 0 }; // State that only changes when the threadpool is not working. volatile TPFuncPtr gFunc_ptr = NULL; @@ -261,19 +263,20 @@ pthread_cond_t caller_cond_var; // # of threads intended to be running. Running threads will decrement this // as they discover they've run out of work to do. -volatile cl_int gRunning = 0; +std::atomic gRunning{ 0 }; // The total number of threads launched. -volatile cl_int gThreadCount = 0; +std::atomic gThreadCount{ 0 }; + #ifdef _WIN32 void ThreadPool_WorkerFunc(void *p) #else void *ThreadPool_WorkerFunc(void *p) #endif { - cl_uint threadID = ThreadPool_AtomicAdd((volatile cl_int *)p, 1); - cl_int item = ThreadPool_AtomicAdd(&gRunCount, -1); - // log_info( "ThreadPool_WorkerFunc start: gRunning = %d\n", gRunning ); + auto &tid = *static_cast *>(p); + cl_uint threadID = tid++; + cl_int item = gRunCount--; while (MAX_COUNT > item) { @@ -282,8 +285,6 @@ void *ThreadPool_WorkerFunc(void *p) // check for more work to do if (0 >= item) { - // log_info("Thread %d has run out of work.\n", threadID); - // No work to do. Attempt to block waiting for work #if defined(_WIN32) EnterCriticalSection(cond_lock); @@ -298,9 +299,7 @@ void *ThreadPool_WorkerFunc(void *p) } #endif // !_WIN32 - cl_int remaining = ThreadPool_AtomicAdd(&gRunning, -1); - // log_info("ThreadPool_WorkerFunc: gRunning = %d\n", - // remaining - 1); + cl_int remaining = gRunning--; if (1 == remaining) { // last thread out signal the main thread to wake up #if defined(_WIN32) @@ -350,7 +349,7 @@ void *ThreadPool_WorkerFunc(void *p) #endif // !_WIN32 // try again to get a valid item id - item = ThreadPool_AtomicAdd(&gRunCount, -1); + item = gRunCount--; if (MAX_COUNT <= item) // exit if we are done { #if defined(_WIN32) @@ -362,8 +361,7 @@ void *ThreadPool_WorkerFunc(void *p) } } - ThreadPool_AtomicAdd(&gRunning, 1); - // log_info("Thread %d has found work.\n", threadID); + gRunning++; #if defined(_WIN32) LeaveCriticalSection(cond_lock); @@ -447,12 +445,12 @@ void *ThreadPool_WorkerFunc(void *p) } // get the next item - item = ThreadPool_AtomicAdd(&gRunCount, -1); + item = gRunCount--; } exit: log_info("ThreadPool: thread %d exiting.\n", threadID); - ThreadPool_AtomicAdd(&gThreadCount, -1); + gThreadCount--; #if !defined(_WIN32) return NULL; #endif @@ -487,7 +485,7 @@ void ThreadPool_Init(void) { cl_int i; int err; - volatile cl_uint threadID = 0; + std::atomic threadID{ 0 }; // Check for manual override of multithreading code. We add this for better // debuggability. @@ -523,7 +521,7 @@ void ThreadPool_Init(void) { // Count the number of bits in ProcessorMask (number of // logical cores) - ULONG mask = ptr->ProcessorMask; + ULONG_PTR mask = ptr->ProcessorMask; while (mask) { ++gThreadCount; @@ -624,7 +622,7 @@ void ThreadPool_Init(void) } #endif // !_WIN32 - gRunning = gThreadCount; + gRunning = gThreadCount.load(); // init threads for (i = 0; i < gThreadCount; i++) { @@ -688,7 +686,6 @@ static BOOL CALLBACK _ThreadPool_Init(_PINIT_ONCE InitOnce, PVOID Parameter, void ThreadPool_Exit(void) { - int err, count; gRunCount = CL_INT_MAX; #if defined(__GNUC__) @@ -702,13 +699,13 @@ void ThreadPool_Exit(void) #endif // spin waiting for threads to die - for (count = 0; 0 != gThreadCount && count < 1000; count++) + for (int count = 0; 0 != gThreadCount && count < 1000; count++) { #if defined(_WIN32) _WakeAllConditionVariable(cond_var); Sleep(1); #else // !_WIN32 - if ((err = pthread_cond_broadcast(&cond_var))) + if (int err = pthread_cond_broadcast(&cond_var)) { log_error("Error %d from pthread_cond_broadcast. Unable to wake up " "work threads. ThreadPool_Exit failed.\n", @@ -722,7 +719,7 @@ void ThreadPool_Exit(void) if (gThreadCount) log_error("Error: Thread pool timed out after 1 second with %d threads " "still active.\n", - gThreadCount); + gThreadCount.load()); else log_info("Thread pool exited in a orderly fashion.\n"); } @@ -738,7 +735,9 @@ void ThreadPool_Exit(void) // all available then it would make more sense to use those features. cl_int ThreadPool_Do(TPFuncPtr func_ptr, cl_uint count, void *userInfo) { +#ifndef _WIN32 cl_int newErr; +#endif cl_int err = 0; // Lazily set up our threads #if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600) @@ -913,7 +912,9 @@ cl_int ThreadPool_Do(TPFuncPtr func_ptr, cl_uint count, void *userInfo) err = jobError; +#ifndef _WIN32 exit: +#endif // exit critical region #if defined(_WIN32) LeaveCriticalSection(gThreadPoolLock); diff --git a/test_common/harness/alloc.h b/test_common/harness/alloc.h index 653dde05f5..3b00d7c914 100644 --- a/test_common/harness/alloc.h +++ b/test_common/harness/alloc.h @@ -29,7 +29,7 @@ #include "mingw_compat.h" #endif -static void* align_malloc(size_t size, size_t alignment) +inline void* align_malloc(size_t size, size_t alignment) { #if defined(_WIN32) && defined(_MSC_VER) return _aligned_malloc(size, alignment); @@ -53,7 +53,7 @@ static void* align_malloc(size_t size, size_t alignment) #endif } -static void align_free(void* ptr) +inline void align_free(void* ptr) { #if defined(_WIN32) && defined(_MSC_VER) _aligned_free(ptr); diff --git a/test_common/harness/clImageHelper.h b/test_common/harness/clImageHelper.h index 45395fb078..3019ff341b 100644 --- a/test_common/harness/clImageHelper.h +++ b/test_common/harness/clImageHelper.h @@ -37,6 +37,11 @@ static inline cl_mem create_image_2d(cl_context context, cl_mem_flags flags, { cl_mem mImage = NULL; + if (!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))) + { + host_ptr = NULL; + } + #ifdef CL_VERSION_1_2 cl_image_desc image_desc_dest; image_desc_dest.image_type = CL_MEM_OBJECT_IMAGE2D; @@ -119,6 +124,11 @@ static inline cl_mem create_image_3d(cl_context context, cl_mem_flags flags, { cl_mem mImage; + if (!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))) + { + host_ptr = NULL; + } + #ifdef CL_VERSION_1_2 cl_image_desc image_desc; image_desc.image_type = CL_MEM_OBJECT_IMAGE3D; @@ -166,6 +176,11 @@ create_image_2d_array(cl_context context, cl_mem_flags flags, { cl_mem mImage; + if (!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))) + { + host_ptr = NULL; + } + cl_image_desc image_desc; image_desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; image_desc.image_width = image_width; @@ -196,6 +211,11 @@ static inline cl_mem create_image_1d_array( { cl_mem mImage; + if (!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))) + { + host_ptr = NULL; + } + cl_image_desc image_desc; image_desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY; image_desc.image_width = image_width; @@ -227,6 +247,11 @@ static inline cl_mem create_image_1d(cl_context context, cl_mem_flags flags, { cl_mem mImage; + if (!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))) + { + host_ptr = NULL; + } + cl_image_desc image_desc; image_desc.image_type = buffer ? CL_MEM_OBJECT_IMAGE1D_BUFFER : CL_MEM_OBJECT_IMAGE1D; diff --git a/test_common/harness/compat.h b/test_common/harness/compat.h index 7aad15a09b..4053b7ee72 100644 --- a/test_common/harness/compat.h +++ b/test_common/harness/compat.h @@ -18,13 +18,13 @@ #if defined(_WIN32) && defined(_MSC_VER) #include -#endif - +#else #ifdef __cplusplus #define EXTERN_C extern "C" #else #define EXTERN_C #endif +#endif // @@ -309,13 +309,6 @@ EXTERN_C int __builtin_clz(unsigned int pattern); #endif -#ifndef MIN -#define MIN(x, y) (((x) < (y)) ? (x) : (y)) -#endif -#ifndef MAX -#define MAX(x, y) (((x) > (y)) ? (x) : (y)) -#endif - /*----------------------------------------------------------------------------- WARNING: DO NOT USE THESE MACROS: diff --git a/test_common/harness/conversions.cpp b/test_common/harness/conversions.cpp index fc3317c7d7..c773126930 100644 --- a/test_common/harness/conversions.cpp +++ b/test_common/harness/conversions.cpp @@ -181,8 +181,8 @@ static ULong sUpperLimits[kNumExplicitTypes] = { 0xffffffffLL, 0xffffffffLL, 0x7fffffffffffffffLL, - 0xffffffffffffffffLL, - 0xffffffffffffffffLL, + 0xffffffffffffffffULL, + 0xffffffffffffffffULL, 0, 0 }; // Last two values aren't stored here diff --git a/test_common/harness/deviceInfo.cpp b/test_common/harness/deviceInfo.cpp index 12611873d9..97ab8c8553 100644 --- a/test_common/harness/deviceInfo.cpp +++ b/test_common/harness/deviceInfo.cpp @@ -63,6 +63,40 @@ int is_extension_available(cl_device_id device, const char *extensionName) return false; } +cl_version get_extension_version(cl_device_id device, const char *extensionName) +{ + cl_int err; + size_t size; + + err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS_WITH_VERSION, 0, nullptr, + &size); + if (err != CL_SUCCESS) + { + throw std::runtime_error("clGetDeviceInfo(CL_DEVICE_EXTENSIONS_WITH_" + "VERSION) failed to return size\n"); + } + + std::vector extensions(size / sizeof(cl_name_version)); + err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS_WITH_VERSION, size, + extensions.data(), &size); + if (err != CL_SUCCESS) + { + throw std::runtime_error("clGetDeviceInfo(CL_DEVICE_EXTENSIONS_WITH_" + "VERSION) failed to return value\n"); + } + + for (auto &ext : extensions) + { + if (!strcmp(extensionName, ext.name)) + { + return ext.version; + } + } + + throw std::runtime_error("Extension " + std::string(extensionName) + + " not supported by device!"); +} + /* Returns a string containing the supported extensions list for a device. */ std::string get_device_extensions_string(cl_device_id device) { @@ -86,3 +120,15 @@ std::string get_device_name(cl_device_id device) { return get_device_info_string(device, CL_DEVICE_NAME); } + +size_t get_max_param_size(cl_device_id device) +{ + size_t ret(0); + if (clGetDeviceInfo(device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof(ret), &ret, + nullptr) + != CL_SUCCESS) + { + throw std::runtime_error("clGetDeviceInfo failed\n"); + } + return ret; +} diff --git a/test_common/harness/deviceInfo.h b/test_common/harness/deviceInfo.h index af923a2fa4..912dd198ac 100644 --- a/test_common/harness/deviceInfo.h +++ b/test_common/harness/deviceInfo.h @@ -31,6 +31,11 @@ std::string get_device_info_string(cl_device_id device, /* Determines if an extension is supported by a device. */ int is_extension_available(cl_device_id device, const char *extensionName); +/* Returns the version of the extension the device supports or throws an + * exception if the extension is not supported by the device. */ +cl_version get_extension_version(cl_device_id device, + const char *extensionName); + /* Returns a string containing the supported extensions list for a device. */ std::string get_device_extensions_string(cl_device_id device); @@ -42,4 +47,8 @@ std::string get_device_version_string(cl_device_id device); /* Returns a string containing the device name. */ std::string get_device_name(cl_device_id device); + +// Returns the maximum size in bytes for Kernel Parameters +size_t get_max_param_size(cl_device_id device); + #endif // _deviceInfo_h diff --git a/test_common/harness/errorHelpers.cpp b/test_common/harness/errorHelpers.cpp index c1d0602897..eaccf64119 100644 --- a/test_common/harness/errorHelpers.cpp +++ b/test_common/harness/errorHelpers.cpp @@ -18,9 +18,12 @@ #include #include +#include + #include "errorHelpers.h" #include "parseParameters.h" +#include "testHarness.h" #include @@ -68,6 +71,7 @@ const char *IGetErrorString(int clErrorCode) case CL_INVALID_SAMPLER: return "CL_INVALID_SAMPLER"; case CL_INVALID_BINARY: return "CL_INVALID_BINARY"; case CL_INVALID_BUILD_OPTIONS: return "CL_INVALID_BUILD_OPTIONS"; + case CL_INVALID_PLATFORM: return "CL_INVALID_PLATFORM"; case CL_INVALID_PROGRAM: return "CL_INVALID_PROGRAM"; case CL_INVALID_PROGRAM_EXECUTABLE: return "CL_INVALID_PROGRAM_EXECUTABLE"; @@ -299,10 +303,6 @@ const char *GetQueuePropertyName(cl_command_queue_properties property) } } -#ifndef MAX -#define MAX(_a, _b) ((_a) > (_b) ? (_a) : (_b)) -#endif - #if defined(_MSC_VER) #define scalbnf(_a, _i) ldexpf(_a, _i) #define scalbn(_a, _i) ldexp(_a, _i) @@ -355,7 +355,7 @@ static float Ulp_Error_Half_Float(float test, double reference) // The unbiased exponent of the ulp unit place int ulp_exp = - HALF_MANT_DIG - 1 - MAX(ilogb(reference), HALF_MIN_EXP - 1); + HALF_MANT_DIG - 1 - std::max(ilogb(reference), HALF_MIN_EXP - 1); // Scale the exponent of the error return (float)scalbn(testVal - reference, ulp_exp); @@ -363,7 +363,7 @@ static float Ulp_Error_Half_Float(float test, double reference) // reference is a normal power of two or a zero int ulp_exp = - HALF_MANT_DIG - 1 - MAX(ilogb(reference) - 1, HALF_MIN_EXP - 1); + HALF_MANT_DIG - 1 - std::max(ilogb(reference) - 1, HALF_MIN_EXP - 1); // Scale the exponent of the error return (float)scalbn(testVal - reference, ulp_exp); @@ -435,7 +435,8 @@ float Ulp_Error(float test, double reference) return 0.0f; // if we are expecting a NaN, any NaN is fine // The unbiased exponent of the ulp unit place - int ulp_exp = FLT_MANT_DIG - 1 - MAX(ilogb(reference), FLT_MIN_EXP - 1); + int ulp_exp = + FLT_MANT_DIG - 1 - std::max(ilogb(reference), FLT_MIN_EXP - 1); // Scale the exponent of the error return (float)scalbn(testVal - reference, ulp_exp); @@ -443,7 +444,8 @@ float Ulp_Error(float test, double reference) // reference is a normal power of two or a zero // The unbiased exponent of the ulp unit place - int ulp_exp = FLT_MANT_DIG - 1 - MAX(ilogb(reference) - 1, FLT_MIN_EXP - 1); + int ulp_exp = + FLT_MANT_DIG - 1 - std::max(ilogb(reference) - 1, FLT_MIN_EXP - 1); // Scale the exponent of the error return (float)scalbn(testVal - reference, ulp_exp); @@ -511,7 +513,7 @@ float Ulp_Error_Double(double test, long double reference) // The unbiased exponent of the ulp unit place int ulp_exp = - DBL_MANT_DIG - 1 - MAX(ilogbl(reference), DBL_MIN_EXP - 1); + DBL_MANT_DIG - 1 - std::max(ilogbl(reference), DBL_MIN_EXP - 1); // Scale the exponent of the error float result = (float)scalbnl(testVal - reference, ulp_exp); @@ -527,7 +529,7 @@ float Ulp_Error_Double(double test, long double reference) // reference is a normal power of two or a zero // The unbiased exponent of the ulp unit place int ulp_exp = - DBL_MANT_DIG - 1 - MAX(ilogbl(reference) - 1, DBL_MIN_EXP - 1); + DBL_MANT_DIG - 1 - std::max(ilogbl(reference) - 1, DBL_MIN_EXP - 1); // Scale the exponent of the error float result = (float)scalbnl(testVal - reference, ulp_exp); @@ -563,7 +565,7 @@ cl_int OutputBuildLogs(cl_program program, cl_uint num_devices, error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size_ret); test_error(error, "Unable to query context's device size"); - num_devices = size_ret / sizeof(cl_device_id); + num_devices = static_cast(size_ret / sizeof(cl_device_id)); device_list = (cl_device_id *)malloc(size_ret); if (device_list == NULL) { @@ -622,7 +624,6 @@ cl_int OutputBuildLogs(cl_program program, cl_uint num_devices, const char *subtests_to_skip_with_offline_compiler[] = { "get_kernel_arg_info", - "get_kernel_arg_info_compatibility", "binary_create", "load_program_source", "load_multistring_source", @@ -686,23 +687,23 @@ const char *subtests_to_skip_with_offline_compiler[] = { "unload_build_info", "unload_program_binaries", "features_macro", + "progvar_prog_scope_misc", + "library_function" }; -int check_functions_for_offline_compiler(const char *subtestname, - cl_device_id device) +bool check_functions_for_offline_compiler(const char *subtestname) { if (gCompilationMode != kOnline) { - int nNotRequiredWithOfflineCompiler = - sizeof(subtests_to_skip_with_offline_compiler) / sizeof(char *); - size_t i; - for (i = 0; i < nNotRequiredWithOfflineCompiler; ++i) + size_t nNotRequiredWithOfflineCompiler = + ARRAY_SIZE(subtests_to_skip_with_offline_compiler); + for (size_t i = 0; i < nNotRequiredWithOfflineCompiler; ++i) { if (!strcmp(subtestname, subtests_to_skip_with_offline_compiler[i])) { - return 1; + return false; } } } - return 0; + return true; } diff --git a/test_common/harness/errorHelpers.h b/test_common/harness/errorHelpers.h index 1944601467..80eb3b58ef 100644 --- a/test_common/harness/errorHelpers.h +++ b/test_common/harness/errorHelpers.h @@ -56,17 +56,13 @@ static int vlog_win32(const char *format, ...); #define vlog printf #endif -#define ct_assert(b) ct_assert_i(b, __LINE__) -#define ct_assert_i(b, line) ct_assert_ii(b, line) -#define ct_assert_ii(b, line) \ - int _compile_time_assertion_on_line_##line[b ? 1 : -1]; - #define test_fail(msg, ...) \ { \ log_error(msg, ##__VA_ARGS__); \ return TEST_FAIL; \ } #define test_error(errCode, msg) test_error_ret(errCode, msg, errCode) +#define test_error_fail(errCode, msg) test_error_ret(errCode, msg, TEST_FAIL) #define test_error_ret(errCode, msg, retValue) \ { \ auto errCodeResult = errCode; \ @@ -97,21 +93,6 @@ static int vlog_win32(const char *format, ...); "the device version! (from %s:%d)\n", \ msg, __FILE__, __LINE__); -#define test_missing_support_offline_cmpiler(errCode, msg) \ - test_missing_support_offline_cmpiler_ret(errCode, msg, errCode) -// this macro should always return CL_SUCCESS, but print the skip message on -// test not supported with offline compiler -#define test_missing_support_offline_cmpiler_ret(errCode, msg, retValue) \ - { \ - if (errCode != CL_SUCCESS) \ - { \ - log_info("INFO: Subtest %s tests is not supported in offline " \ - "compiler execution path! (from %s:%d)\n", \ - msg, __FILE__, __LINE__); \ - return TEST_SKIP; \ - } \ - } - // expected error code vs. what we got #define test_failure_error(errCode, expectedErrCode, msg) \ test_failure_error_ret(errCode, expectedErrCode, msg, \ @@ -186,8 +167,7 @@ extern const char *GetAddressModeName(cl_addressing_mode mode); extern const char *GetQueuePropertyName(cl_command_queue_properties properties); extern const char *GetDeviceTypeName(cl_device_type type); -int check_functions_for_offline_compiler(const char *subtestname, - cl_device_id device); +bool check_functions_for_offline_compiler(const char *subtestname); cl_int OutputBuildLogs(cl_program program, cl_uint num_devices, cl_device_id *device_list); diff --git a/test_common/harness/fpcontrol.h b/test_common/harness/fpcontrol.h index 40826c5c81..222aa2c40c 100644 --- a/test_common/harness/fpcontrol.h +++ b/test_common/harness/fpcontrol.h @@ -16,6 +16,8 @@ #ifndef _fpcontrol_h #define _fpcontrol_h +#include + // In order to get tests for correctly rounded operations (e.g. multiply) to // work properly we need to be able to set the reference hardware to FTZ mode if // the device hardware is running in that mode. We have explored all other @@ -30,7 +32,11 @@ // that rounding mode. #if defined(__APPLE__) || defined(_MSC_VER) || defined(__linux__) \ || defined(__MINGW32__) +#ifdef _MSC_VER typedef int FPU_mode_type; +#else +typedef int64_t FPU_mode_type; +#endif #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \ || defined(__MINGW32__) #include @@ -39,7 +45,7 @@ typedef int FPU_mode_type; extern __thread fpu_control_t fpu_control; #endif // Set the reference hardware floating point unit to FTZ mode -static inline void ForceFTZ(FPU_mode_type *mode) +inline void ForceFTZ(FPU_mode_type *mode) { #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \ || defined(__MINGW32__) @@ -55,7 +61,7 @@ static inline void ForceFTZ(FPU_mode_type *mode) __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr | (1U << 24))); // Add 64 bit support #elif defined(__aarch64__) - unsigned fpscr; + uint64_t fpscr; __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr)); *mode = fpscr; __asm__ volatile("msr fpcr, %0" ::"r"(fpscr | (1U << 24))); @@ -65,7 +71,7 @@ static inline void ForceFTZ(FPU_mode_type *mode) } // Disable the denorm flush to zero -static inline void DisableFTZ(FPU_mode_type *mode) +inline void DisableFTZ(FPU_mode_type *mode) { #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \ || defined(__MINGW32__) @@ -81,7 +87,7 @@ static inline void DisableFTZ(FPU_mode_type *mode) __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr & ~(1U << 24))); // Add 64 bit support #elif defined(__aarch64__) - unsigned fpscr; + uint64_t fpscr; __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr)); *mode = fpscr; __asm__ volatile("msr fpcr, %0" ::"r"(fpscr & ~(1U << 24))); @@ -91,7 +97,7 @@ static inline void DisableFTZ(FPU_mode_type *mode) } // Restore the reference hardware to floating point state indicated by *mode -static inline void RestoreFPState(FPU_mode_type *mode) +inline void RestoreFPState(FPU_mode_type *mode) { #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \ || defined(__MINGW32__) diff --git a/test_common/harness/imageHelpers.cpp b/test_common/harness/imageHelpers.cpp index 26110a474e..3a5c5533aa 100644 --- a/test_common/harness/imageHelpers.cpp +++ b/test_common/harness/imageHelpers.cpp @@ -1,5 +1,5 @@ // -// Copyright (c) 2017 The Khronos Group Inc. +// Copyright (c) 2017,2021 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -269,7 +269,7 @@ int is_format_signed(const cl_image_format *format) } } -uint32_t get_pixel_size(cl_image_format *format) +uint32_t get_pixel_size(const cl_image_format *format) { switch (format->image_channel_data_type) { @@ -330,7 +330,7 @@ uint32_t next_power_of_two(uint32_t v) return v; } -uint32_t get_pixel_alignment(cl_image_format *format) +uint32_t get_pixel_alignment(const cl_image_format *format) { return next_power_of_two(get_pixel_size(format)); } @@ -408,6 +408,118 @@ int get_32_bit_image_format(cl_context context, cl_mem_object_type objType, return -1; } +void print_first_pixel_difference_error(size_t where, const char *sourcePixel, + const char *destPixel, + image_descriptor *imageInfo, size_t y, + size_t thirdDim) +{ + size_t pixel_size = get_pixel_size(imageInfo->format); + + log_error("ERROR: Scanline %d did not verify for image size %d,%d,%d " + "pitch %d (extra %d bytes)\n", + (int)y, (int)imageInfo->width, (int)imageInfo->height, + (int)thirdDim, (int)imageInfo->rowPitch, + (int)imageInfo->rowPitch + - (int)imageInfo->width * (int)pixel_size); + log_error("Failed at column: %ld ", where); + + switch (pixel_size) + { + case 1: + log_error("*0x%2.2x vs. 0x%2.2x\n", ((cl_uchar *)sourcePixel)[0], + ((cl_uchar *)destPixel)[0]); + break; + case 2: + log_error("*0x%4.4x vs. 0x%4.4x\n", ((cl_ushort *)sourcePixel)[0], + ((cl_ushort *)destPixel)[0]); + break; + case 3: + log_error("*{0x%2.2x, 0x%2.2x, 0x%2.2x} vs. " + "{0x%2.2x, 0x%2.2x, 0x%2.2x}\n", + ((cl_uchar *)sourcePixel)[0], + ((cl_uchar *)sourcePixel)[1], + ((cl_uchar *)sourcePixel)[2], ((cl_uchar *)destPixel)[0], + ((cl_uchar *)destPixel)[1], ((cl_uchar *)destPixel)[2]); + break; + case 4: + log_error("*0x%8.8x vs. 0x%8.8x\n", ((cl_uint *)sourcePixel)[0], + ((cl_uint *)destPixel)[0]); + break; + case 6: + log_error( + "*{0x%4.4x, 0x%4.4x, 0x%4.4x} vs. " + "{0x%4.4x, 0x%4.4x, 0x%4.4x}\n", + ((cl_ushort *)sourcePixel)[0], ((cl_ushort *)sourcePixel)[1], + ((cl_ushort *)sourcePixel)[2], ((cl_ushort *)destPixel)[0], + ((cl_ushort *)destPixel)[1], ((cl_ushort *)destPixel)[2]); + break; + case 8: + log_error("*0x%16.16llx vs. 0x%16.16llx\n", + ((cl_ulong *)sourcePixel)[0], ((cl_ulong *)destPixel)[0]); + break; + case 12: + log_error("*{0x%8.8x, 0x%8.8x, 0x%8.8x} vs. " + "{0x%8.8x, 0x%8.8x, 0x%8.8x}\n", + ((cl_uint *)sourcePixel)[0], ((cl_uint *)sourcePixel)[1], + ((cl_uint *)sourcePixel)[2], ((cl_uint *)destPixel)[0], + ((cl_uint *)destPixel)[1], ((cl_uint *)destPixel)[2]); + break; + case 16: + log_error("*{0x%8.8x, 0x%8.8x, 0x%8.8x, 0x%8.8x} vs. " + "{0x%8.8x, 0x%8.8x, 0x%8.8x, 0x%8.8x}\n", + ((cl_uint *)sourcePixel)[0], ((cl_uint *)sourcePixel)[1], + ((cl_uint *)sourcePixel)[2], ((cl_uint *)sourcePixel)[3], + ((cl_uint *)destPixel)[0], ((cl_uint *)destPixel)[1], + ((cl_uint *)destPixel)[2], ((cl_uint *)destPixel)[3]); + break; + default: + log_error("Don't know how to print pixel size of %ld\n", + pixel_size); + break; + } +} + +size_t compare_scanlines(const image_descriptor *imageInfo, const char *aPtr, + const char *bPtr) +{ + size_t pixel_size = get_pixel_size(imageInfo->format); + size_t column; + + for (column = 0; column < imageInfo->width; column++) + { + switch (imageInfo->format->image_channel_data_type) + { + // If the data type is 101010, then ignore bits 31 and 32 when + // comparing the row + case CL_UNORM_INT_101010: { + cl_uint aPixel = *(cl_uint *)aPtr; + cl_uint bPixel = *(cl_uint *)bPtr; + if ((aPixel & 0x3fffffff) != (bPixel & 0x3fffffff)) + return column; + } + break; + + // If the data type is 555, ignore bit 15 when comparing the row + case CL_UNORM_SHORT_555: { + cl_ushort aPixel = *(cl_ushort *)aPtr; + cl_ushort bPixel = *(cl_ushort *)bPtr; + if ((aPixel & 0x7fff) != (bPixel & 0x7fff)) return column; + } + break; + + default: + if (memcmp(aPtr, bPtr, pixel_size) != 0) return column; + break; + } + + aPtr += pixel_size; + bPtr += pixel_size; + } + + // If we didn't find a difference, return the width of the image + return column; +} + int random_log_in_range(int minV, int maxV, MTdata d) { double v = log2(((double)genrand_int32(d) / (double)0xffffffff) + 1); @@ -483,8 +595,8 @@ struct AddressingTable { AddressingTable() { - ct_assert((CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE < 6)); - ct_assert(CL_FILTER_NEAREST - CL_FILTER_LINEAR < 2); + static_assert(CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE < 6, ""); + static_assert(CL_FILTER_NEAREST - CL_FILTER_LINEAR < 2, ""); mTable[CL_ADDRESS_NONE - CL_ADDRESS_NONE] [CL_FILTER_NEAREST - CL_FILTER_NEAREST] = NoAddressFn; @@ -533,7 +645,7 @@ bool is_sRGBA_order(cl_channel_order image_channel_order) // Format helpers -int has_alpha(cl_image_format *format) +int has_alpha(const cl_image_format *format) { switch (format->image_channel_order) { @@ -550,6 +662,7 @@ int has_alpha(cl_image_format *format) case CL_RGBA: return 1; case CL_BGRA: return 1; case CL_ARGB: return 1; + case CL_ABGR: return 1; case CL_INTENSITY: return 1; case CL_LUMINANCE: return 0; #ifdef CL_BGR1_APPLE @@ -577,16 +690,13 @@ int has_alpha(cl_image_format *format) _b ^= _a; \ _a ^= _b; \ } while (0) -#ifndef MAX -#define MAX(_a, _b) ((_a) > (_b) ? (_a) : (_b)) -#endif void get_max_sizes( size_t *numberOfSizes, const int maxNumberOfSizes, size_t sizes[][3], size_t maxWidth, size_t maxHeight, size_t maxDepth, size_t maxArraySize, const cl_ulong maxIndividualAllocSize, // CL_DEVICE_MAX_MEM_ALLOC_SIZE const cl_ulong maxTotalAllocSize, // CL_DEVICE_GLOBAL_MEM_SIZE - cl_mem_object_type image_type, cl_image_format *format, + cl_mem_object_type image_type, const cl_image_format *format, int usingMaxPixelSizeBuffer) { @@ -647,7 +757,7 @@ void get_max_sizes( if (usingMaxPixelSizeBuffer || raw_pixel_size == 12) raw_pixel_size = 16; size_t max_pixels = (size_t)maxAllocSize / raw_pixel_size; - log_info("Maximums: [%ld x %ld x %ld], raw pixel size %lu bytes, " + log_info("Maximums: [%zu x %zu x %zu], raw pixel size %zu bytes, " "per-allocation limit %gMB.\n", maxWidth, maxHeight, isArray ? maxArraySize : maxDepth, raw_pixel_size, (maxAllocSize / (1024.0 * 1024.0))); @@ -688,10 +798,10 @@ void get_max_sizes( if (image_type == CL_MEM_OBJECT_IMAGE1D) { - double M = maximum_sizes[0]; + size_t M = maximum_sizes[0]; // Store the size - sizes[(*numberOfSizes)][0] = (size_t)M; + sizes[(*numberOfSizes)][0] = M; sizes[(*numberOfSizes)][1] = 1; sizes[(*numberOfSizes)][2] = 1; ++(*numberOfSizes); @@ -705,17 +815,17 @@ void get_max_sizes( { // Determine the size of the fixed dimension - double M = maximum_sizes[fixed_dim]; - double A = max_pixels; + size_t M = maximum_sizes[fixed_dim]; + size_t A = max_pixels; int x0_dim = !fixed_dim; - double x0 = + size_t x0 = static_cast( fmin(fmin(other_sizes[(other_size++) % num_other_sizes], A / M), - maximum_sizes[x0_dim]); + maximum_sizes[x0_dim])); // Store the size - sizes[(*numberOfSizes)][fixed_dim] = (size_t)M; - sizes[(*numberOfSizes)][x0_dim] = (size_t)x0; + sizes[(*numberOfSizes)][fixed_dim] = M; + sizes[(*numberOfSizes)][x0_dim] = x0; sizes[(*numberOfSizes)][2] = 1; ++(*numberOfSizes); } @@ -730,16 +840,17 @@ void get_max_sizes( { // Determine the size of the fixed dimension - double M = maximum_sizes[fixed_dim]; - double A = max_pixels; + size_t M = maximum_sizes[fixed_dim]; + size_t A = max_pixels; // Find two other dimensions, x0 and x1 int x0_dim = (fixed_dim == 0) ? 1 : 0; int x1_dim = (fixed_dim == 2) ? 1 : 2; // Choose two other sizes for these dimensions - double x0 = fmin(fmin(A / M, maximum_sizes[x0_dim]), - other_sizes[(other_size++) % num_other_sizes]); + size_t x0 = static_cast( + fmin(fmin(A / M, maximum_sizes[x0_dim]), + other_sizes[(other_size++) % num_other_sizes])); // GPUs have certain restrictions on minimum width (row alignment) // of images which has given us issues testing small widths in this // test (say we set width to 3 for testing, and compute size based @@ -748,8 +859,9 @@ void get_max_sizes( // width of 16 which doesnt fit in vram). For this purpose we are // not testing width < 16 for this test. if (x0_dim == 0 && x0 < 16) x0 = 16; - double x1 = fmin(fmin(A / M / x0, maximum_sizes[x1_dim]), - other_sizes[(other_size++) % num_other_sizes]); + size_t x1 = static_cast( + fmin(fmin(A / M / x0, maximum_sizes[x1_dim]), + other_sizes[(other_size++) % num_other_sizes])); // Valid image sizes cannot be below 1. Due to the workaround for // the xo_dim where x0 is overidden to 16 there might not be enough @@ -762,9 +874,9 @@ void get_max_sizes( assert(x0 > 0 && M > 0); // Store the size - sizes[(*numberOfSizes)][fixed_dim] = (size_t)M; - sizes[(*numberOfSizes)][x0_dim] = (size_t)x0; - sizes[(*numberOfSizes)][x1_dim] = (size_t)x1; + sizes[(*numberOfSizes)][fixed_dim] = M; + sizes[(*numberOfSizes)][x0_dim] = x0; + sizes[(*numberOfSizes)][x1_dim] = x1; ++(*numberOfSizes); } } @@ -775,20 +887,20 @@ void get_max_sizes( switch (image_type) { case CL_MEM_OBJECT_IMAGE1D: - log_info(" size[%d] = [%ld] (%g MB image)\n", j, sizes[j][0], + log_info(" size[%d] = [%zu] (%g MB image)\n", j, sizes[j][0], raw_pixel_size * sizes[j][0] * sizes[j][1] * sizes[j][2] / (1024.0 * 1024.0)); break; case CL_MEM_OBJECT_IMAGE1D_ARRAY: case CL_MEM_OBJECT_IMAGE2D: - log_info(" size[%d] = [%ld %ld] (%g MB image)\n", j, + log_info(" size[%d] = [%zu %zu] (%g MB image)\n", j, sizes[j][0], sizes[j][1], raw_pixel_size * sizes[j][0] * sizes[j][1] * sizes[j][2] / (1024.0 * 1024.0)); break; case CL_MEM_OBJECT_IMAGE2D_ARRAY: case CL_MEM_OBJECT_IMAGE3D: - log_info(" size[%d] = [%ld %ld %ld] (%g MB image)\n", j, + log_info(" size[%d] = [%zu %zu %zu] (%g MB image)\n", j, sizes[j][0], sizes[j][1], sizes[j][2], raw_pixel_size * sizes[j][0] * sizes[j][1] * sizes[j][2] / (1024.0 * 1024.0)); @@ -797,7 +909,7 @@ void get_max_sizes( } } -float get_max_absolute_error(cl_image_format *format, +float get_max_absolute_error(const cl_image_format *format, image_sampler_data *sampler) { if (sampler->filter_mode == CL_FILTER_NEAREST) return 0.0f; @@ -816,7 +928,7 @@ float get_max_absolute_error(cl_image_format *format, } } -float get_max_relative_error(cl_image_format *format, +float get_max_relative_error(const cl_image_format *format, image_sampler_data *sampler, int is3D, int isLinearFilter) { @@ -899,7 +1011,7 @@ float get_max_relative_error(cl_image_format *format, return maxError; } -size_t get_format_max_int(cl_image_format *format) +size_t get_format_max_int(const cl_image_format *format) { switch (format->image_channel_data_type) { @@ -932,7 +1044,7 @@ size_t get_format_max_int(cl_image_format *format) } } -int get_format_min_int(cl_image_format *format) +int get_format_min_int(const cl_image_format *format) { switch (format->image_channel_data_type) { @@ -1052,12 +1164,13 @@ void escape_inf_nan_values(char *data, size_t allocSize) char *generate_random_image_data(image_descriptor *imageInfo, BufferOwningPtr &P, MTdata d) { - size_t allocSize = get_image_size(imageInfo); + size_t allocSize = static_cast(get_image_size(imageInfo)); size_t pixelRowBytes = imageInfo->width * get_pixel_size(imageInfo->format); size_t i; if (imageInfo->num_mip_levels > 1) - allocSize = compute_mipmapped_image_size(*imageInfo); + allocSize = + static_cast(compute_mipmapped_image_size(*imageInfo)); #if defined(__APPLE__) char *data = NULL; @@ -1089,7 +1202,7 @@ char *generate_random_image_data(image_descriptor *imageInfo, if (data == NULL) { - log_error("ERROR: Unable to malloc %lu bytes for " + log_error("ERROR: Unable to malloc %zu bytes for " "generate_random_image_data\n", allocSize); return 0; @@ -1247,7 +1360,7 @@ void read_image_pixel_float(void *imageData, image_descriptor *imageInfo, int x, return; } - cl_image_format *format = imageInfo->format; + const cl_image_format *format = imageInfo->format; unsigned int i; float tempData[4]; @@ -1416,6 +1529,12 @@ void read_image_pixel_float(void *imageData, image_descriptor *imageInfo, int x, outData[2] = tempData[3]; outData[3] = tempData[0]; break; + case CL_ABGR: + outData[0] = tempData[3]; + outData[1] = tempData[2]; + outData[2] = tempData[1]; + outData[3] = tempData[0]; + break; case CL_BGRA: case CL_sBGRA: outData[0] = tempData[2]; @@ -1600,24 +1719,26 @@ bool get_integer_coords_offset(float x, float y, float z, float xAddressOffset, // At this point, we're dealing with non-normalized coordinates. - outX = adFn(floorf(x), width); + outX = adFn(static_cast(floorf(x)), width); // 1D and 2D arrays require special care for the index coordinate: switch (imageInfo->type) { case CL_MEM_OBJECT_IMAGE1D_ARRAY: - outY = calculate_array_index(y, (float)imageInfo->arraySize - 1.0f); - outZ = 0.0f; /* don't care! */ + outY = static_cast( + calculate_array_index(y, (float)imageInfo->arraySize - 1.0f)); + outZ = 0; /* don't care! */ break; case CL_MEM_OBJECT_IMAGE2D_ARRAY: - outY = adFn(floorf(y), height); - outZ = calculate_array_index(z, (float)imageInfo->arraySize - 1.0f); + outY = adFn(static_cast(floorf(y)), height); + outZ = static_cast( + calculate_array_index(z, (float)imageInfo->arraySize - 1.0f)); break; default: // legacy path: - if (height != 0) outY = adFn(floorf(y), height); - if (depth != 0) outZ = adFn(floorf(z), depth); + if (height != 0) outY = adFn(static_cast(floorf(y)), height); + if (depth != 0) outZ = adFn(static_cast(floorf(z)), depth); } return !((int)refX == outX && (int)refY == outY && (int)refZ == outZ); @@ -1688,7 +1809,7 @@ static float unnormalize_coordinate(const char *name, float coord, float offset, switch (addressing_mode) { case CL_ADDRESS_REPEAT: - ret = RepeatNormalizedAddressFn(coord, extent); + ret = RepeatNormalizedAddressFn(coord, static_cast(extent)); if (verbose) { @@ -1712,7 +1833,8 @@ static float unnormalize_coordinate(const char *name, float coord, float offset, break; case CL_ADDRESS_MIRRORED_REPEAT: - ret = MirroredRepeatNormalizedAddressFn(coord, extent); + ret = MirroredRepeatNormalizedAddressFn( + coord, static_cast(extent)); if (verbose) { @@ -1890,13 +2012,13 @@ FloatPixel sample_image_pixel_float_offset( // coordinates. Note that the array cases again require special // care, per section 8.4 in the OpenCL 1.2 Specification. - ix = adFn(floorf(x), width_lod); + ix = adFn(static_cast(floorf(x)), width_lod); switch (imageInfo->type) { case CL_MEM_OBJECT_IMAGE1D_ARRAY: - iy = - calculate_array_index(y, (float)(imageInfo->arraySize - 1)); + iy = static_cast(calculate_array_index( + y, (float)(imageInfo->arraySize - 1))); iz = 0; if (verbose) { @@ -1904,18 +2026,18 @@ FloatPixel sample_image_pixel_float_offset( } break; case CL_MEM_OBJECT_IMAGE2D_ARRAY: - iy = adFn(floorf(y), height_lod); - iz = - calculate_array_index(z, (float)(imageInfo->arraySize - 1)); + iy = adFn(static_cast(floorf(y)), height_lod); + iz = static_cast(calculate_array_index( + z, (float)(imageInfo->arraySize - 1))); if (verbose) { log_info("\tArray index %f evaluates to %d\n", z, iz); } break; default: - iy = adFn(floorf(y), height_lod); + iy = adFn(static_cast(floorf(y)), height_lod); if (depth_lod != 0) - iz = adFn(floorf(z), depth_lod); + iz = adFn(static_cast(floorf(z)), depth_lod); else iz = 0; } @@ -1969,16 +2091,16 @@ FloatPixel sample_image_pixel_float_offset( height = 1; } - int x1 = adFn(floorf(x - 0.5f), width); + int x1 = adFn(static_cast(floorf(x - 0.5f)), width); int y1 = 0; - int x2 = adFn(floorf(x - 0.5f) + 1, width); + int x2 = adFn(static_cast(floorf(x - 0.5f) + 1), width); int y2 = 0; if ((imageInfo->type != CL_MEM_OBJECT_IMAGE1D) && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY) && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_BUFFER)) { - y1 = adFn(floorf(y - 0.5f), height); - y2 = adFn(floorf(y - 0.5f) + 1, height); + y1 = adFn(static_cast(floorf(y - 0.5f)), height); + y2 = adFn(static_cast(floorf(y - 0.5f) + 1), height); } else { @@ -2069,12 +2191,12 @@ FloatPixel sample_image_pixel_float_offset( else { // 3D linear filtering - int x1 = adFn(floorf(x - 0.5f), width_lod); - int y1 = adFn(floorf(y - 0.5f), height_lod); - int z1 = adFn(floorf(z - 0.5f), depth_lod); - int x2 = adFn(floorf(x - 0.5f) + 1, width_lod); - int y2 = adFn(floorf(y - 0.5f) + 1, height_lod); - int z2 = adFn(floorf(z - 0.5f) + 1, depth_lod); + int x1 = adFn(static_cast(floorf(x - 0.5f)), width_lod); + int y1 = adFn(static_cast(floorf(y - 0.5f)), height_lod); + int z1 = adFn(static_cast(floorf(z - 0.5f)), depth_lod); + int x2 = adFn(static_cast(floorf(x - 0.5f) + 1), width_lod); + int y2 = adFn(static_cast(floorf(y - 0.5f) + 1), height_lod); + int z2 = adFn(static_cast(floorf(z - 0.5f) + 1), depth_lod); if (verbose) log_info("\tActual integer coords used (i = floor(x-.5)): " @@ -2398,6 +2520,14 @@ void swizzle_vector_for_image(T *srcVector, const cl_image_format *imageFormat) srcVector[1] = srcVector[0]; srcVector[0] = temp; break; + case CL_ABGR: + temp = srcVector[3]; + srcVector[3] = srcVector[0]; + srcVector[0] = temp; + temp = srcVector[2]; + srcVector[2] = srcVector[1]; + srcVector[1] = temp; + break; case CL_BGRA: case CL_sBGRA: temp = srcVector[0]; @@ -2813,15 +2943,18 @@ void pack_image_pixel_error(const float *srcVector, case CL_UNSIGNED_INT8: { const cl_uchar *ptr = (const cl_uchar *)results; for (unsigned int i = 0; i < channelCount; i++) - errors[i] = (cl_int)ptr[i] - - (cl_int)CONVERT_UINT(srcVector[i], 255.f, CL_UCHAR_MAX); + errors[i] = static_cast( + (cl_int)ptr[i] + - (cl_int)CONVERT_UINT(srcVector[i], 255.f, CL_UCHAR_MAX)); break; } case CL_UNSIGNED_INT16: { const cl_ushort *ptr = (const cl_ushort *)results; for (unsigned int i = 0; i < channelCount; i++) - errors[i] = (cl_int)ptr[i] - - (cl_int)CONVERT_UINT(srcVector[i], 32767.f, CL_USHRT_MAX); + errors[i] = static_cast( + (cl_int)ptr[i] + - (cl_int)CONVERT_UINT(srcVector[i], 32767.f, + CL_USHRT_MAX)); break; } case CL_UNSIGNED_INT32: { @@ -2917,7 +3050,7 @@ int DetectFloatToHalfRoundingMode( } // Create our program, and a kernel - const char *kernel[1] = { + const char *kernelSource[1] = { "kernel void detect_round( global float4 *in, write_only image2d_t " "out )\n" "{\n" @@ -2927,8 +3060,9 @@ int DetectFloatToHalfRoundingMode( }; clProgramWrapper program; - err = create_single_kernel_helper_create_program(context, &program, 1, - kernel); + clKernelWrapper kernel; + err = create_single_kernel_helper(context, &program, &kernel, 1, + kernelSource, "detect_round"); if (NULL == program || err) { @@ -2953,29 +3087,7 @@ int DetectFloatToHalfRoundingMode( return err; } - err = clBuildProgram(program, 1, &device, "", NULL, NULL); - if (err) - { - log_error("Error: could not build program in " - "DetectFloatToHalfRoundingMode (%d)", - err); - clReleaseMemObject(inBuf); - clReleaseMemObject(outImage); - return err; - } - - cl_kernel k = clCreateKernel(program, "detect_round", &err); - if (NULL == k || err) - { - log_error("Error: could not create kernel in " - "DetectFloatToHalfRoundingMode (%d)", - err); - clReleaseMemObject(inBuf); - clReleaseMemObject(outImage); - return err; - } - - err = clSetKernelArg(k, 0, sizeof(cl_mem), &inBuf); + err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inBuf); if (err) { log_error("Error: could not set argument 0 of kernel in " @@ -2983,11 +3095,10 @@ int DetectFloatToHalfRoundingMode( err); clReleaseMemObject(inBuf); clReleaseMemObject(outImage); - clReleaseKernel(k); return err; } - err = clSetKernelArg(k, 1, sizeof(cl_mem), &outImage); + err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &outImage); if (err) { log_error("Error: could not set argument 1 of kernel in " @@ -2995,14 +3106,13 @@ int DetectFloatToHalfRoundingMode( err); clReleaseMemObject(inBuf); clReleaseMemObject(outImage); - clReleaseKernel(k); return err; } // Run the kernel size_t global_work_size = count; - err = clEnqueueNDRangeKernel(q, k, 1, NULL, &global_work_size, NULL, 0, - NULL, NULL); + err = clEnqueueNDRangeKernel(q, kernel, 1, NULL, &global_work_size, + NULL, 0, NULL, NULL); if (err) { log_error("Error: could not enqueue kernel in " @@ -3010,7 +3120,6 @@ int DetectFloatToHalfRoundingMode( err); clReleaseMemObject(inBuf); clReleaseMemObject(outImage); - clReleaseKernel(k); return err; } @@ -3028,7 +3137,6 @@ int DetectFloatToHalfRoundingMode( err); clReleaseMemObject(inBuf); clReleaseMemObject(outImage); - clReleaseKernel(k); return err; } @@ -3083,7 +3191,6 @@ int DetectFloatToHalfRoundingMode( // clean up clReleaseMemObject(inBuf); clReleaseMemObject(outImage); - clReleaseKernel(k); return err; } @@ -3168,7 +3275,7 @@ char *create_random_image_data(ExplicitType dataType, if (data == NULL) { log_error( - "ERROR: Unable to malloc %lu bytes for create_random_image_data\n", + "ERROR: Unable to malloc %zu bytes for create_random_image_data\n", allocSize); return NULL; } @@ -3597,8 +3704,8 @@ cl_float CoordWalker::Get(size_t idx, size_t el) } -void print_read_header(cl_image_format *format, image_sampler_data *sampler, - bool err, int t) +void print_read_header(const cl_image_format *format, + image_sampler_data *sampler, bool err, int t) { const char *addressMode = NULL; const char *normalizedNames[2] = { "UNNORMALIZED", "NORMALIZED" }; @@ -3664,7 +3771,7 @@ void print_read_header(cl_image_format *format, image_sampler_data *sampler, } } -void print_write_header(cl_image_format *format, bool err = false) +void print_write_header(const cl_image_format *format, bool err = false) { if (err) log_error("[%-7s %-24s %d]\n", @@ -3679,7 +3786,7 @@ void print_write_header(cl_image_format *format, bool err = false) } -void print_header(cl_image_format *format, bool err = false) +void print_header(const cl_image_format *format, bool err = false) { if (err) { @@ -3928,7 +4035,8 @@ bool is_image_format_required(cl_image_format format, cl_mem_flags flags, cl_uint compute_max_mip_levels(size_t width, size_t height, size_t depth) { - cl_uint retMaxMipLevels = 0, max_dim = 0; + cl_uint retMaxMipLevels = 0; + size_t max_dim = 0; max_dim = width; max_dim = height > max_dim ? height : max_dim; diff --git a/test_common/harness/imageHelpers.h b/test_common/harness/imageHelpers.h index 87595094eb..e728a939c2 100644 --- a/test_common/harness/imageHelpers.h +++ b/test_common/harness/imageHelpers.h @@ -76,11 +76,11 @@ int round_to_even(float v); #define CONVERT_UINT(v, max, max_val) \ (v < 0 ? 0 : (v > max ? max_val : round_to_even(v))) -extern void print_read_header(cl_image_format *format, +extern void print_read_header(const cl_image_format *format, image_sampler_data *sampler, bool err = false, int t = 0); -extern void print_write_header(cl_image_format *format, bool err); -extern void print_header(cl_image_format *format, bool err); +extern void print_write_header(const cl_image_format *format, bool err); +extern void print_header(const cl_image_format *format, bool err); extern bool find_format(cl_image_format *formatList, unsigned int numFormats, cl_image_format *formatToFind); extern bool is_image_format_required(cl_image_format format, cl_mem_flags flags, @@ -98,7 +98,7 @@ extern uint32_t get_channel_order_channel_count(cl_channel_order order); cl_channel_type get_channel_type_from_name(const char *name); cl_channel_order get_channel_order_from_name(const char *name); extern int is_format_signed(const cl_image_format *format); -extern uint32_t get_pixel_size(cl_image_format *format); +extern uint32_t get_pixel_size(const cl_image_format *format); /* Helper to get any ol image format as long as it is 8-bits-per-channel */ extern int get_8_bit_image_format(cl_context context, @@ -123,7 +123,7 @@ typedef struct size_t rowPitch; size_t slicePitch; size_t arraySize; - cl_image_format *format; + const cl_image_format *format; cl_mem buffer; cl_mem_object_type type; cl_uint num_mip_levels; @@ -134,14 +134,22 @@ typedef struct float p[4]; } FloatPixel; +void print_first_pixel_difference_error(size_t where, const char *sourcePixel, + const char *destPixel, + image_descriptor *imageInfo, size_t y, + size_t thirdDim); + +size_t compare_scanlines(const image_descriptor *imageInfo, const char *aPtr, + const char *bPtr); + void get_max_sizes(size_t *numberOfSizes, const int maxNumberOfSizes, size_t sizes[][3], size_t maxWidth, size_t maxHeight, size_t maxDepth, size_t maxArraySize, const cl_ulong maxIndividualAllocSize, const cl_ulong maxTotalAllocSize, - cl_mem_object_type image_type, cl_image_format *format, + cl_mem_object_type image_type, const cl_image_format *format, int usingMaxPixelSize = 0); -extern size_t get_format_max_int(cl_image_format *format); +extern size_t get_format_max_int(const cl_image_format *format); extern cl_ulong get_image_size(image_descriptor const *imageInfo); extern cl_ulong get_image_size_mb(image_descriptor const *imageInfo); @@ -173,7 +181,7 @@ extern void copy_image_data(image_descriptor *srcImageInfo, void *destImageValues, const size_t sourcePos[], const size_t destPos[], const size_t regionSize[]); -int has_alpha(cl_image_format *format); +int has_alpha(const cl_image_format *format); extern bool is_sRGBA_order(cl_channel_order image_channel_order); @@ -240,7 +248,7 @@ void read_image_pixel(void *imageData, image_descriptor *imageInfo, int x, return; } - cl_image_format *format = imageInfo->format; + const cl_image_format *format = imageInfo->format; unsigned int i; T tempData[4]; @@ -484,12 +492,14 @@ void read_image_pixel(void *imageData, image_descriptor *imageInfo, int x, } else if (format->image_channel_order == CL_INTENSITY) { + outData[0] = tempData[0]; outData[1] = tempData[0]; outData[2] = tempData[0]; outData[3] = tempData[0]; } else if (format->image_channel_order == CL_LUMINANCE) { + outData[0] = tempData[0]; outData[1] = tempData[0]; outData[2] = tempData[0]; } @@ -662,9 +672,9 @@ extern char *create_random_image_data(ExplicitType dataType, extern void get_sampler_kernel_code(image_sampler_data *imageSampler, char *outLine); -extern float get_max_absolute_error(cl_image_format *format, +extern float get_max_absolute_error(const cl_image_format *format, image_sampler_data *sampler); -extern float get_max_relative_error(cl_image_format *format, +extern float get_max_relative_error(const cl_image_format *format, image_sampler_data *sampler, int is3D, int isLinearFilter); diff --git a/test_common/harness/integer_ops_test_info.h b/test_common/harness/integer_ops_test_info.h new file mode 100644 index 0000000000..ad7b303b47 --- /dev/null +++ b/test_common/harness/integer_ops_test_info.h @@ -0,0 +1,92 @@ +// +// Copyright (c) 2021 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef INTEGER_OPS_TEST_INFO_H +#define INTEGER_OPS_TEST_INFO_H + +#include "conversions.h" +#include "testHarness.h" + +// TODO: expand usage to other tests. + +template struct TestInfo +{ +}; +template <> struct TestInfo +{ + static const ExplicitType explicitType = kChar; + static constexpr const char* deviceTypeName = "char"; + static constexpr const char* deviceTypeNameSigned = "char"; + static constexpr const char* deviceTypeNameUnsigned = "uchar"; +}; +template <> struct TestInfo +{ + static const ExplicitType explicitType = kUChar; + static constexpr const char* deviceTypeName = "uchar"; + static constexpr const char* deviceTypeNameSigned = "char"; + static constexpr const char* deviceTypeNameUnsigned = "uchar"; +}; +template <> struct TestInfo +{ + static const ExplicitType explicitType = kShort; + static constexpr const char* deviceTypeName = "short"; + static constexpr const char* deviceTypeNameSigned = "short"; + static constexpr const char* deviceTypeNameUnsigned = "ushort"; +}; +template <> struct TestInfo +{ + static const ExplicitType explicitType = kUShort; + static constexpr const char* deviceTypeName = "ushort"; + static constexpr const char* deviceTypeNameSigned = "short"; + static constexpr const char* deviceTypeNameUnsigned = "ushort"; +}; +template <> struct TestInfo +{ + static const ExplicitType explicitType = kInt; + static constexpr const char* deviceTypeName = "int"; + static constexpr const char* deviceTypeNameSigned = "int"; + static constexpr const char* deviceTypeNameUnsigned = "uint"; +}; +template <> struct TestInfo +{ + static const ExplicitType explicitType = kUInt; + static constexpr const char* deviceTypeName = "uint"; + static constexpr const char* deviceTypeNameSigned = "int"; + static constexpr const char* deviceTypeNameUnsigned = "uint"; +}; +template <> struct TestInfo +{ + static const ExplicitType explicitType = kLong; + static constexpr const char* deviceTypeName = "long"; + static constexpr const char* deviceTypeNameSigned = "long"; + static constexpr const char* deviceTypeNameUnsigned = "ulong"; +}; +template <> struct TestInfo +{ + static const ExplicitType explicitType = kULong; + static constexpr const char* deviceTypeName = "ulong"; + static constexpr const char* deviceTypeNameSigned = "long"; + static constexpr const char* deviceTypeNameUnsigned = "ulong"; +}; + +template +static void fill_vector_with_random_data(std::vector& v) +{ + MTdataHolder d(gRandomSeed); + generate_random_data(TestInfo::explicitType, v.size(), d, v.data()); +} + +#endif /* INTEGER_OPS_TEST_INFO_H */ diff --git a/test_common/harness/kernelHelpers.cpp b/test_common/harness/kernelHelpers.cpp index 6ccdcc6e6f..1d1f8d8c4c 100644 --- a/test_common/harness/kernelHelpers.cpp +++ b/test_common/harness/kernelHelpers.cpp @@ -312,57 +312,6 @@ get_compilation_mode_str(const CompilationMode compilationMode) } } -#ifdef KHRONOS_OFFLINE_COMPILER -static std::string -get_khronos_compiler_command(const cl_uint device_address_space_size, - const bool openclCXX, const std::string &bOptions, - const std::string &sourceFilename, - const std::string &outputFilename) -{ - // Set compiler options - // Emit SPIR-V - std::string compilerOptions = " -cc1 -emit-spirv"; - // : for 32 bit SPIR-V use spir-unknown-unknown, for 64 bit SPIR-V - // use spir64-unknown-unknown. - if (device_address_space_size == 32) - { - compilerOptions += " -triple=spir-unknown-unknown"; - } - else - { - compilerOptions += " -triple=spir64-unknown-unknown"; - } - // Set OpenCL C++ flag required by SPIR-V-ready clang (compiler provided by - // Khronos) - if (openclCXX) - { - compilerOptions = compilerOptions + " -cl-std=c++"; - } - // Set correct includes - if (openclCXX) - { - compilerOptions += " -I "; - compilerOptions += STRINGIFY_VALUE(CL_LIBCLCXX_DIR); - } - else - { - compilerOptions += " -include opencl.h"; - } - -#ifdef KHRONOS_OFFLINE_COMPILER_OPTIONS - compilerOptions += STRINGIFY_VALUE(KHRONOS_OFFLINE_COMPILER_OPTIONS); -#endif - - // Add build options passed to this function - compilerOptions += " " + bOptions; - compilerOptions += " " + sourceFilename + " -o " + outputFilename; - std::string runString = - STRINGIFY_VALUE(KHRONOS_OFFLINE_COMPILER) + compilerOptions; - - return runString; -} -#endif // KHRONOS_OFFLINE_COMPILER - static cl_int get_cl_device_info_str(const cl_device_id device, const cl_uint device_address_space_size, const CompilationMode compilationMode, @@ -476,50 +425,28 @@ static int invoke_offline_compiler(const cl_device_id device, const CompilationMode compilationMode, const std::string &bOptions, const std::string &sourceFilename, - const std::string &outputFilename, - const bool openclCXX) + const std::string &outputFilename) { std::string runString; - if (openclCXX) - { -#ifndef KHRONOS_OFFLINE_COMPILER - log_error("CL C++ compilation is not possible: " - "KHRONOS_OFFLINE_COMPILER was not defined.\n"); - return CL_INVALID_OPERATION; -#else - if (compilationMode != kSpir_v) - { - log_error("Compilation mode must be SPIR-V for Khronos compiler"); - return -1; - } - runString = get_khronos_compiler_command( - device_address_space_size, openclCXX, bOptions, sourceFilename, - outputFilename); -#endif - } - else - { - std::string clDeviceInfoFilename; - - // See cl_offline_compiler-interface.txt for a description of the - // format of the CL device information file generated below, and - // the internal command line interface for invoking the offline - // compiler. + std::string clDeviceInfoFilename; - cl_int err = - write_cl_device_info(device, device_address_space_size, - compilationMode, clDeviceInfoFilename); - if (err != CL_SUCCESS) - { - log_error("Failed writing CL device info file\n"); - return err; - } + // See cl_offline_compiler-interface.txt for a description of the + // format of the CL device information file generated below, and + // the internal command line interface for invoking the offline + // compiler. - runString = get_offline_compilation_command( - device_address_space_size, compilationMode, bOptions, - sourceFilename, outputFilename, clDeviceInfoFilename); + cl_int err = write_cl_device_info(device, device_address_space_size, + compilationMode, clDeviceInfoFilename); + if (err != CL_SUCCESS) + { + log_error("Failed writing CL device info file\n"); + return err; } + runString = get_offline_compilation_command( + device_address_space_size, compilationMode, bOptions, sourceFilename, + outputFilename, clDeviceInfoFilename); + // execute script log_info("Executing command: %s\n", runString.c_str()); fflush(stdout); @@ -577,9 +504,8 @@ static cl_int get_device_address_bits(const cl_device_id device, static int get_offline_compiler_output( std::ifstream &ifs, const cl_device_id device, cl_uint deviceAddrSpaceSize, - const bool openclCXX, const CompilationMode compilationMode, - const std::string &bOptions, const std::string &kernelPath, - const std::string &kernelNamePrefix) + const CompilationMode compilationMode, const std::string &bOptions, + const std::string &kernelPath, const std::string &kernelNamePrefix) { std::string sourceFilename = get_cl_source_filename_with_path(kernelPath, kernelNamePrefix); @@ -599,12 +525,12 @@ static int get_offline_compiler_output( } else { - int error = invoke_offline_compiler( - device, deviceAddrSpaceSize, compilationMode, bOptions, - sourceFilename, outputFilename, openclCXX); + int error = invoke_offline_compiler(device, deviceAddrSpaceSize, + compilationMode, bOptions, + sourceFilename, outputFilename); if (error != CL_SUCCESS) return error; - // read output file + // open output file for reading ifs.open(outputFilename.c_str(), std::ios::binary); if (!ifs.good()) { @@ -614,14 +540,33 @@ static int get_offline_compiler_output( } } } + + if (compilationMode == kSpir_v && !gDisableSPIRVValidation) + { + std::string runString = gSPIRVValidator + " " + outputFilename; + + int returnCode = system(runString.c_str()); + if (returnCode == -1) + { + log_error("Error: failed to invoke SPIR-V validator\n"); + return CL_COMPILE_PROGRAM_FAILURE; + } + else if (returnCode != 0) + { + log_error( + "Failed to validate SPIR-V file %s: system() returned 0x%x\n", + outputFilename.c_str(), returnCode); + return CL_COMPILE_PROGRAM_FAILURE; + } + } + return CL_SUCCESS; } static int create_single_kernel_helper_create_program_offline( cl_context context, cl_device_id device, cl_program *outProgram, unsigned int numKernelLines, const char *const *kernelProgram, - const char *buildOptions, const bool openclCXX, - CompilationMode compilationMode) + const char *buildOptions, CompilationMode compilationMode) { if (kCacheModeDumpCl == gCompilationCacheMode) { @@ -649,24 +594,12 @@ static int create_single_kernel_helper_create_program_offline( std::ifstream ifs; error = get_offline_compiler_output(ifs, device, device_address_space_size, - openclCXX, compilationMode, bOptions, + compilationMode, bOptions, gCompilationCachePath, kernelName); if (error != CL_SUCCESS) return error; -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT -// ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - if (openclCXX) - { - return CL_SUCCESS; - } -#endif - ifs.seekg(0, ifs.end); - int length = ifs.tellg(); + size_t length = static_cast(ifs.tellg()); ifs.seekg(0, ifs.beg); // treat modifiedProgram as input for clCreateProgramWithBinary @@ -748,8 +681,7 @@ static int create_single_kernel_helper_create_program_offline( static int create_single_kernel_helper_create_program( cl_context context, cl_device_id device, cl_program *outProgram, unsigned int numKernelLines, const char **kernelProgram, - const char *buildOptions, const bool openclCXX, - CompilationMode compilationMode) + const char *buildOptions, CompilationMode compilationMode) { std::lock_guard compiler_lock(gCompilerMutex); @@ -787,37 +719,39 @@ static int create_single_kernel_helper_create_program( { return create_single_kernel_helper_create_program_offline( context, device, outProgram, numKernelLines, kernelProgram, - buildOptions, openclCXX, compilationMode); + buildOptions, compilationMode); } } -int create_single_kernel_helper_create_program( - cl_context context, cl_program *outProgram, unsigned int numKernelLines, - const char **kernelProgram, const char *buildOptions, const bool openclCXX) +int create_single_kernel_helper_create_program(cl_context context, + cl_program *outProgram, + unsigned int numKernelLines, + const char **kernelProgram, + const char *buildOptions) { return create_single_kernel_helper_create_program( context, NULL, outProgram, numKernelLines, kernelProgram, buildOptions, - openclCXX, gCompilationMode); + gCompilationMode); } int create_single_kernel_helper_create_program_for_device( cl_context context, cl_device_id device, cl_program *outProgram, unsigned int numKernelLines, const char **kernelProgram, - const char *buildOptions, const bool openclCXX) + const char *buildOptions) { return create_single_kernel_helper_create_program( context, device, outProgram, numKernelLines, kernelProgram, - buildOptions, openclCXX, gCompilationMode); + buildOptions, gCompilationMode); } int create_single_kernel_helper_with_build_options( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines, const char **kernelProgram, - const char *kernelName, const char *buildOptions, const bool openclCXX) + const char *kernelName, const char *buildOptions) { return create_single_kernel_helper(context, outProgram, outKernel, numKernelLines, kernelProgram, - kernelName, buildOptions, openclCXX); + kernelName, buildOptions); } // Creates and builds OpenCL C/C++ program, and creates a kernel @@ -826,7 +760,7 @@ int create_single_kernel_helper(cl_context context, cl_program *outProgram, unsigned int numKernelLines, const char **kernelProgram, const char *kernelName, - const char *buildOptions, const bool openclCXX) + const char *buildOptions) { // For the logic that automatically adds -cl-std it is much cleaner if the // build options have RAII. This buffer will store the potentially updated @@ -865,51 +799,14 @@ int create_single_kernel_helper(cl_context context, cl_program *outProgram, build_options_internal += cl_std; buildOptions = build_options_internal.c_str(); } - int error; - // Create OpenCL C++ program - if (openclCXX) - { -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT -// ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - // Save global variable - bool tempgCompilationCacheMode = gCompilationCacheMode; - // Force OpenCL C++ -> SPIR-V compilation on every run - gCompilationCacheMode = kCacheModeOverwrite; -#endif - error = create_openclcpp_program(context, outProgram, numKernelLines, - kernelProgram, buildOptions); - if (error != CL_SUCCESS) - { - log_error("Create program failed: %d, line: %d\n", error, __LINE__); - return error; - } -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT -// ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - // Restore global variables - gCompilationCacheMode = tempgCompilationCacheMode; - log_info("WARNING: KERNEL %s WAS ONLY COMPILED TO SPIR-V\n", - kernelName); - return error; -#endif - } - // Create OpenCL C program - else + int error = create_single_kernel_helper_create_program( + context, outProgram, numKernelLines, kernelProgram, buildOptions); + if (error != CL_SUCCESS) { - error = create_single_kernel_helper_create_program( - context, outProgram, numKernelLines, kernelProgram, buildOptions); - if (error != CL_SUCCESS) - { - log_error("Create program failed: %d, line: %d\n", error, __LINE__); - return error; - } + log_error("Create program failed: %d, line: %d\n", error, __LINE__); + return error; } + // Remove offline-compiler-only build options std::string newBuildOptions; if (buildOptions != NULL) @@ -930,18 +827,6 @@ int create_single_kernel_helper(cl_context context, cl_program *outProgram, kernelName, newBuildOptions.c_str()); } -// Creates OpenCL C++ program -int create_openclcpp_program(cl_context context, cl_program *outProgram, - unsigned int numKernelLines, - const char **kernelProgram, - const char *buildOptions) -{ - // Create program - return create_single_kernel_helper_create_program( - context, NULL, outProgram, numKernelLines, kernelProgram, buildOptions, - true, kSpir_v); -} - // Builds OpenCL C/C++ program and creates int build_program_create_kernel_helper( cl_context context, cl_program *outProgram, cl_kernel *outKernel, @@ -1361,7 +1246,7 @@ int is_image_format_supported(cl_context context, cl_mem_flags flags, list = (cl_image_format *)malloc(count * sizeof(cl_image_format)); if (NULL == list) { - log_error("Error: unable to allocate %ld byte buffer for image format " + log_error("Error: unable to allocate %zu byte buffer for image format " "list at %s:%d (err = %d)\n", count * sizeof(cl_image_format), __FILE__, __LINE__, err); return 0; @@ -1822,7 +1707,7 @@ Version get_max_OpenCL_C_for_context(cl_context context) else { current_version = - (std::min)(device_version, current_version); + std::min(device_version, current_version); } }); return current_version; @@ -1891,3 +1776,26 @@ bool poll_until(unsigned timeout_ms, unsigned interval_ms, return ret; } + +bool device_supports_double(cl_device_id device) +{ + if (is_extension_available(device, "cl_khr_fp64")) + { + return true; + } + else + { + cl_device_fp_config double_fp_config; + cl_int err = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG, + sizeof(double_fp_config), + &double_fp_config, nullptr); + test_error(err, + "clGetDeviceInfo for CL_DEVICE_DOUBLE_FP_CONFIG failed"); + return double_fp_config != 0; + } +} + +bool device_supports_half(cl_device_id device) +{ + return is_extension_available(device, "cl_khr_fp16"); +} diff --git a/test_common/harness/kernelHelpers.h b/test_common/harness/kernelHelpers.h index d10d44ed2f..4d8f2a8fa7 100644 --- a/test_common/harness/kernelHelpers.h +++ b/test_common/harness/kernelHelpers.h @@ -72,24 +72,21 @@ extern int create_single_kernel_helper(cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines, const char **kernelProgram, const char *kernelName, - const char *buildOptions = NULL, - const bool openclCXX = false); + const char *buildOptions = NULL); extern int create_single_kernel_helper_with_build_options( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines, const char **kernelProgram, - const char *kernelName, const char *buildOptions, - const bool openclCXX = false); + const char *kernelName, const char *buildOptions); extern int create_single_kernel_helper_create_program( cl_context context, cl_program *outProgram, unsigned int numKernelLines, - const char **kernelProgram, const char *buildOptions = NULL, - const bool openclCXX = false); + const char **kernelProgram, const char *buildOptions = NULL); extern int create_single_kernel_helper_create_program_for_device( cl_context context, cl_device_id device, cl_program *outProgram, unsigned int numKernelLines, const char **kernelProgram, - const char *buildOptions = NULL, const bool openclCXX = false); + const char *buildOptions = NULL); /* Creates OpenCL C++ program. This one must be used for creating OpenCL C++ * program. */ @@ -181,7 +178,7 @@ cl_device_fp_config get_default_rounding_mode(cl_device_id device); } #define PASSIVE_REQUIRE_FP16_SUPPORT(device) \ - if (!is_extension_available(device, "cl_khr_fp16")) \ + if (!device_supports_half(device)) \ { \ log_info( \ "\n\tNote: device does not support fp16. Skipping test...\n"); \ @@ -211,4 +208,10 @@ bool device_supports_cl_c_version(cl_device_id device, Version version); bool poll_until(unsigned timeout_ms, unsigned interval_ms, std::function fn); +// Checks whether the device supports double data types +bool device_supports_double(cl_device_id device); + +// Checks whether the device supports half data types +bool device_supports_half(cl_device_id device); + #endif // _kernelHelpers_h diff --git a/test_common/harness/mt19937.cpp b/test_common/harness/mt19937.cpp index c32d9bac6a..f5665deb23 100644 --- a/test_common/harness/mt19937.cpp +++ b/test_common/harness/mt19937.cpp @@ -277,3 +277,5 @@ double genrand_res53(MTdata d) unsigned long a = genrand_int32(d) >> 5, b = genrand_int32(d) >> 6; return (a * 67108864.0 + b) * (1.0 / 9007199254740992.0); } + +bool genrand_bool(MTdata d) { return ((cl_uint)genrand_int32(d) & 1); } diff --git a/test_common/harness/mt19937.h b/test_common/harness/mt19937.h index 35c84933f8..98eec84352 100644 --- a/test_common/harness/mt19937.h +++ b/test_common/harness/mt19937.h @@ -90,6 +90,9 @@ double genrand_res53(MTdata /*data*/); #ifdef __cplusplus +/* generates a random boolean */ +bool genrand_bool(MTdata /*data*/); + #include struct MTdataHolder diff --git a/test_common/harness/os_helpers.cpp b/test_common/harness/os_helpers.cpp index cd350cf831..8fc911083b 100644 --- a/test_common/harness/os_helpers.cpp +++ b/test_common/harness/os_helpers.cpp @@ -333,9 +333,6 @@ std::string exe_dir() #include -#if defined(max) -#undef max -#endif #include #include @@ -404,7 +401,8 @@ std::string exe_path() for (;;) { - DWORD len = GetModuleFileNameA(NULL, &path.front(), path.size()); + DWORD len = GetModuleFileNameA(NULL, &path.front(), + static_cast(path.size())); if (len == 0) { diff --git a/test_common/harness/parseParameters.cpp b/test_common/harness/parseParameters.cpp index b2ab5b0223..e946d744a4 100644 --- a/test_common/harness/parseParameters.cpp +++ b/test_common/harness/parseParameters.cpp @@ -28,11 +28,14 @@ using namespace std; #define DEFAULT_COMPILATION_PROGRAM "cl_offline_compiler" +#define DEFAULT_SPIRV_VALIDATOR "spirv-val" CompilationMode gCompilationMode = kOnline; CompilationCacheMode gCompilationCacheMode = kCacheModeCompileIfAbsent; std::string gCompilationCachePath = "."; std::string gCompilationProgram = DEFAULT_COMPILATION_PROGRAM; +bool gDisableSPIRVValidation = false; +std::string gSPIRVValidator = DEFAULT_SPIRV_VALIDATOR; void helpInfo() { @@ -62,7 +65,14 @@ For offline compilation (binary and spir-v modes) only: Path for offline compiler output and CL source --compilation-program Program to use for offline compilation, defaults to: - )" DEFAULT_COMPILATION_PROGRAM "\n\n"); + )" DEFAULT_COMPILATION_PROGRAM R"( + +For spir-v mode only: + --disable-spirv-validation + Disable validation of SPIR-V using the SPIR-V validator + --spirv-validator + Path for SPIR-V validator, defaults to )" DEFAULT_SPIRV_VALIDATOR "\n" + "\n"); } int parseCustomParam(int argc, const char *argv[], const char *ignore) @@ -198,6 +208,26 @@ int parseCustomParam(int argc, const char *argv[], const char *ignore) return -1; } } + else if (!strcmp(argv[i], "--disable-spirv-validation")) + { + delArg++; + gDisableSPIRVValidation = true; + } + else if (!strcmp(argv[i], "--spirv-validator")) + { + delArg++; + if ((i + 1) < argc) + { + delArg++; + gSPIRVValidator = argv[i + 1]; + } + else + { + log_error("Program argument for --spirv-validator was not " + "specified.\n"); + return -1; + } + } // cleaning parameters from argv tab for (int j = i; j < argc - delArg; j++) argv[j] = argv[j + delArg]; diff --git a/test_common/harness/parseParameters.h b/test_common/harness/parseParameters.h index b0f8328a14..437e12f94d 100644 --- a/test_common/harness/parseParameters.h +++ b/test_common/harness/parseParameters.h @@ -38,6 +38,8 @@ extern CompilationMode gCompilationMode; extern CompilationCacheMode gCompilationCacheMode; extern std::string gCompilationCachePath; extern std::string gCompilationProgram; +extern bool gDisableSPIRVValidation; +extern std::string gSPIRVValidator; extern int parseCustomParam(int argc, const char *argv[], const char *ignore = 0); diff --git a/test_common/harness/propertyHelpers.cpp b/test_common/harness/propertyHelpers.cpp index 3157ca8091..e368f9b61b 100644 --- a/test_common/harness/propertyHelpers.cpp +++ b/test_common/harness/propertyHelpers.cpp @@ -97,15 +97,16 @@ int compareProperties(const std::vector& queried, if (!found) { - log_error("ERROR: expected property 0x%x not found!\n", + log_error("ERROR: expected property 0x%llx not found!\n", check_prop); return TEST_FAIL; } else if (check_value != queried_value) { - log_error("ERROR: mis-matched value for property 0x%x: wanted " - "0x%x, got 0x%x\n", - check_prop, check_value, queried_value); + log_error( + "ERROR: mis-matched value for property 0x%llx: wanted " + "0x%llx, got 0x%llx\n", + check_prop, check_value, queried_value); return TEST_FAIL; } } @@ -113,7 +114,7 @@ int compareProperties(const std::vector& queried, if (queried.size() > check.size()) { log_error("ERROR: all properties found but there are extra " - "properties: expected %d, got %d.\n", + "properties: expected %zu, got %zu.\n", check.size(), queried.size()); return TEST_FAIL; } diff --git a/test_common/harness/rounding_mode.cpp b/test_common/harness/rounding_mode.cpp index 681ccdd8ad..1f531478cf 100644 --- a/test_common/harness/rounding_mode.cpp +++ b/test_common/harness/rounding_mode.cpp @@ -48,7 +48,7 @@ RoundingMode set_round(RoundingMode r, Type outType) const int *p = int_rounds; if (outType == kfloat || outType == kdouble) p = flt_rounds; - int fpscr = 0; + int64_t fpscr = 0; RoundingMode oldRound = get_round(); _FPU_GETCW(fpscr); @@ -59,7 +59,7 @@ RoundingMode set_round(RoundingMode r, Type outType) RoundingMode get_round(void) { - int fpscr; + int64_t fpscr; int oldRound; _FPU_GETCW(fpscr); @@ -203,13 +203,13 @@ void *FlushToZero(void) #if defined(__APPLE__) || defined(__linux__) || defined(_WIN32) #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) union { - int i; + unsigned int i; void *p; } u = { _mm_getcsr() }; _mm_setcsr(u.i | 0x8040); return u.p; #elif defined(__arm__) || defined(__aarch64__) - int fpscr; + int64_t fpscr; _FPU_GETCW(fpscr); _FPU_SETCW(fpscr | FPSCR_FZ); return NULL; @@ -239,7 +239,7 @@ void UnFlushToZero(void *p) } u = { p }; _mm_setcsr(u.i); #elif defined(__arm__) || defined(__aarch64__) - int fpscr; + int64_t fpscr; _FPU_GETCW(fpscr); _FPU_SETCW(fpscr & ~FPSCR_FZ); #elif defined(__PPC__) diff --git a/test_common/harness/rounding_mode.h b/test_common/harness/rounding_mode.h index 064a3a63a8..6f52f0a00b 100644 --- a/test_common/harness/rounding_mode.h +++ b/test_common/harness/rounding_mode.h @@ -16,8 +16,6 @@ #ifndef __ROUNDING_MODE_H__ #define __ROUNDING_MODE_H__ -#pragma STDC FENV_ACCESS ON - #include "compat.h" #if (defined(_WIN32) && defined(_MSC_VER)) diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp index b2516331b7..b3863918da 100644 --- a/test_common/harness/testHarness.cpp +++ b/test_common/harness/testHarness.cpp @@ -480,6 +480,7 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum, case TEST_PASS: break; case TEST_FAIL: return fail_init_info(testNum); case TEST_SKIP: return skip_init_info(testNum); + case TEST_SKIPPED_ITSELF: return skip_init_info(testNum); } } } @@ -493,6 +494,7 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum, case TEST_PASS: break; case TEST_FAIL: return fail_init_info(testNum); case TEST_SKIP: return skip_init_info(testNum); + case TEST_SKIPPED_ITSELF: return skip_init_info(testNum); } } @@ -711,20 +713,20 @@ int parseAndCallCommandLineTests(int argc, const char *argv[], ret = saveResultsToJson(filename, argv[0], testList, selectedTestList, resultTestList, testNum); } - } - if (std::any_of(resultTestList, resultTestList + testNum, - [](test_status result) { - switch (result) - { - case TEST_PASS: - case TEST_SKIP: return false; - case TEST_FAIL: - default: return true; - }; - })) - { - ret = EXIT_FAILURE; + if (std::any_of(resultTestList, resultTestList + testNum, + [](test_status result) { + switch (result) + { + case TEST_PASS: + case TEST_SKIP: return false; + case TEST_FAIL: + default: return true; + }; + })) + { + ret = EXIT_FAILURE; + } } free(selectedTestList); @@ -781,6 +783,14 @@ test_status callSingleTestFunction(test_definition test, return TEST_SKIP; } + if (!check_functions_for_offline_compiler(test.name)) + { + log_info("Subtest %s tests is not supported in offline compiler " + "execution path!\n", + test.name); + return TEST_SKIP; + } + /* Create a context to work with, unless we're told not to */ if (!forceNoContextCreation) { @@ -810,14 +820,12 @@ test_status callSingleTestFunction(test_definition test, if (queue == NULL) { print_error(error, "Unable to create testing command queue"); + clReleaseContext(context); return TEST_FAIL; } } /* Run the test and print the result */ - error = check_functions_for_offline_compiler(test.name, deviceToUse); - test_missing_support_offline_cmpiler(error, test.name); - if (test.func == NULL) { // Skip unimplemented test, can happen when all of the tests are @@ -859,7 +867,7 @@ test_status callSingleTestFunction(test_definition test, int error = clFinish(queue); if (error) { - log_error("clFinish failed: %d", error); + log_error("clFinish failed: %s\n", IGetErrorString(error)); status = TEST_FAIL; } clReleaseCommandQueue(queue); @@ -1159,6 +1167,15 @@ test_status check_spirv_compilation_readiness(cl_device_id device) return TEST_PASS; } +cl_platform_id getPlatformFromDevice(cl_device_id deviceID) +{ + cl_platform_id platform = nullptr; + cl_int err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(platform), + &platform, nullptr); + ASSERT_SUCCESS(err, "clGetDeviceInfo"); + return platform; +} + void PrintArch(void) { vlog("sizeof( void*) = %ld\n", sizeof(void *)); diff --git a/test_common/harness/testHarness.h b/test_common/harness/testHarness.h index d681616a55..d6054de981 100644 --- a/test_common/harness/testHarness.h +++ b/test_common/harness/testHarness.h @@ -80,6 +80,7 @@ typedef enum test_status TEST_PASS = 0, TEST_FAIL = 1, TEST_SKIP = 2, + TEST_SKIPPED_ITSELF = -100, } test_status; extern int gFailCount; @@ -177,6 +178,8 @@ extern int gHasLong; // This is set to 1 if the device suppots long and ulong // types in OpenCL C. extern bool gCoreILProgram; +extern cl_platform_id getPlatformFromDevice(cl_device_id deviceID); + #if !defined(__APPLE__) void memset_pattern4(void *, const void *, size_t); #endif diff --git a/test_common/harness/threadTesting.cpp b/test_common/harness/threadTesting.cpp deleted file mode 100644 index 875ee59b92..0000000000 --- a/test_common/harness/threadTesting.cpp +++ /dev/null @@ -1,98 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "compat.h" -#include "threadTesting.h" -#include "errorHelpers.h" -#include -#include - -#if !defined(_WIN32) -#include -#endif - -#if 0 // Disabed for now - -typedef struct -{ - basefn mFunction; - cl_device_id mDevice; - cl_context mContext; - int mNumElements; -} TestFnArgs; - -//////////////////////////////////////////////////////////////////////////////// -// Thread-based testing. Spawns a new thread to run the given test function, -// then waits for it to complete. The entire idea is that, if the thread crashes, -// we can catch it and report it as a failure instead of crashing the entire suite -//////////////////////////////////////////////////////////////////////////////// - -void *test_thread_wrapper( void *data ) -{ - TestFnArgs *args; - int retVal; - cl_context context; - - args = (TestFnArgs *)data; - - /* Create a new context to use (contexts can't cross threads) */ - context = clCreateContext(NULL, args->mDeviceGroup); - if( context == NULL ) - { - log_error("clCreateContext failed for new thread\n"); - return (void *)(-1); - } - - /* Call function */ - retVal = args->mFunction( args->mDeviceGroup, args->mDevice, context, args->mNumElements ); - - clReleaseContext( context ); - - return (void *)retVal; -} - -int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements ) -{ - int error; - pthread_t threadHdl; - void *retVal; - TestFnArgs args; - - - args.mFunction = fnToTest; - args.mDeviceGroup = deviceGroup; - args.mDevice = device; - args.mContext = context; - args.mNumElements = numElements; - - - error = pthread_create( &threadHdl, NULL, test_thread_wrapper, (void *)&args ); - if( error != 0 ) - { - log_error( "ERROR: Unable to create thread for testing!\n" ); - return -1; - } - - /* Thread has been started, now just wait for it to complete (or crash) */ - error = pthread_join( threadHdl, &retVal ); - if( error != 0 ) - { - log_error( "ERROR: Unable to join testing thread!\n" ); - return -1; - } - - return (int)((intptr_t)retVal); -} -#endif diff --git a/test_common/harness/threadTesting.h b/test_common/harness/threadTesting.h index 91ff279f65..2f3c1873c7 100644 --- a/test_common/harness/threadTesting.h +++ b/test_common/harness/threadTesting.h @@ -22,12 +22,7 @@ #include #endif -#define TEST_SKIPPED_ITSELF -100 - typedef int (*basefn)(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_threaded_function(basefn fnToTest, cl_device_id device, - cl_context context, cl_command_queue queue, - int numElements); -#endif // _threadTesting_h +#endif // _threadTesting_h \ No newline at end of file diff --git a/test_common/harness/typeWrappers.h b/test_common/harness/typeWrappers.h index 9a58a9d2b8..50c7c9387f 100644 --- a/test_common/harness/typeWrappers.h +++ b/test_common/harness/typeWrappers.h @@ -16,122 +16,134 @@ #ifndef _typeWrappers_h #define _typeWrappers_h -#include -#include - #if !defined(_WIN32) #include #endif #include "compat.h" -#include #include "mt19937.h" #include "errorHelpers.h" #include "kernelHelpers.h" -/* cl_context wrapper */ +#include +#include -class clContextWrapper { -public: - clContextWrapper() { mContext = NULL; } - clContextWrapper(cl_context program) { mContext = program; } - ~clContextWrapper() - { - if (mContext != NULL) clReleaseContext(mContext); - } +namespace wrapper_details { + +// clRetain*() and clRelease*() functions share the same type. +template // T should be cl_context, cl_program, ... +using RetainReleaseType = cl_int CL_API_CALL(T); - clContextWrapper &operator=(const cl_context &rhs) +// A generic wrapper class that follows OpenCL retain/release semantics. +// +// This Wrapper class implement copy and move semantics, which makes it +// compatible with standard containers for example. +// +// Template parameters: +// - T is the cl_* type (e.g. cl_context, cl_program, ...) +// - Retain is the clRetain* function (e.g. clRetainContext, ...) +// - Release is the clRelease* function (e.g. clReleaseContext, ...) +template Retain, RetainReleaseType Release> +class Wrapper { + static_assert(std::is_pointer::value, "T should be a pointer type."); + T object = nullptr; + + void retain() { - mContext = rhs; - return *this; + if (!object) return; + + auto err = Retain(object); + if (err != CL_SUCCESS) + { + print_error(err, "clRetain*() failed"); + std::abort(); + } } - operator cl_context() const { return mContext; } - cl_context *operator&() { return &mContext; } + void release() + { + if (!object) return; - bool operator==(const cl_context &rhs) { return mContext == rhs; } + auto err = Release(object); + if (err != CL_SUCCESS) + { + print_error(err, "clRelease*() failed"); + std::abort(); + } + } -protected: - cl_context mContext; -}; +public: + Wrapper() = default; -/* cl_program wrapper */ + // On initialisation, assume the object has a refcount of one. + Wrapper(T object): object(object) {} -class clProgramWrapper { -public: - clProgramWrapper() { mProgram = NULL; } - clProgramWrapper(cl_program program) { mProgram = program; } - ~clProgramWrapper() + // On assignment, assume the object has a refcount of one. + Wrapper &operator=(T rhs) { - if (mProgram != NULL) clReleaseProgram(mProgram); + reset(rhs); + return *this; } - clProgramWrapper &operator=(const cl_program &rhs) + // Copy semantics, increase retain count. + Wrapper(Wrapper const &w) { *this = w; } + Wrapper &operator=(Wrapper const &w) { - mProgram = rhs; + reset(w.object); + retain(); return *this; } - operator cl_program() const { return mProgram; } - - cl_program *operator&() { return &mProgram; } - bool operator==(const cl_program &rhs) { return mProgram == rhs; } - -protected: - cl_program mProgram; -}; - -/* cl_kernel wrapper */ - -class clKernelWrapper { -public: - clKernelWrapper() { mKernel = NULL; } - clKernelWrapper(cl_kernel kernel) { mKernel = kernel; } - ~clKernelWrapper() + // Move semantics, directly take ownership. + Wrapper(Wrapper &&w) { *this = std::move(w); } + Wrapper &operator=(Wrapper &&w) { - if (mKernel != NULL) clReleaseKernel(mKernel); + reset(w.object); + w.object = nullptr; + return *this; } - clKernelWrapper &operator=(const cl_kernel &rhs) + ~Wrapper() { reset(); } + + // Release the existing object, if any, and own the new one, if any. + void reset(T new_object = nullptr) { - mKernel = rhs; - return *this; + release(); + object = new_object; } - operator cl_kernel() const { return mKernel; } - cl_kernel *operator&() { return &mKernel; } + operator T() const { return object; } - bool operator==(const cl_kernel &rhs) { return mKernel == rhs; } - -protected: - cl_kernel mKernel; + // Ideally this function should not exist as it breaks encapsulation by + // allowing external mutation of the Wrapper internal state. However, too + // much code currently relies on this. For example, instead of using T* as + // output parameters, existing code can be updated to use Wrapper& instead. + T *operator&() { return &object; } }; -/* cl_mem (stream) wrapper */ +} // namespace wrapper_details -class clMemWrapper { -public: - clMemWrapper() { mMem = NULL; } - clMemWrapper(cl_mem mem) { mMem = mem; } - ~clMemWrapper() - { - if (mMem != NULL) clReleaseMemObject(mMem); - } +using clContextWrapper = + wrapper_details::Wrapper; - clMemWrapper &operator=(const cl_mem &rhs) - { - mMem = rhs; - return *this; - } - operator cl_mem() const { return mMem; } +using clProgramWrapper = + wrapper_details::Wrapper; - cl_mem *operator&() { return &mMem; } +using clKernelWrapper = + wrapper_details::Wrapper; - bool operator==(const cl_mem &rhs) { return mMem == rhs; } +using clMemWrapper = + wrapper_details::Wrapper; -protected: - cl_mem mMem; -}; +using clCommandQueueWrapper = + wrapper_details::Wrapper; + +using clSamplerWrapper = + wrapper_details::Wrapper; + +using clEventWrapper = + wrapper_details::Wrapper; class clProtectedImage { public: @@ -183,92 +195,12 @@ class clProtectedImage { cl_mem *operator&() { return ℑ } - bool operator==(const cl_mem &rhs) { return image == rhs; } - protected: void *backingStore; size_t backingStoreSize; cl_mem image; }; -/* cl_command_queue wrapper */ -class clCommandQueueWrapper { -public: - clCommandQueueWrapper() { mMem = NULL; } - clCommandQueueWrapper(cl_command_queue mem) { mMem = mem; } - ~clCommandQueueWrapper() - { - if (mMem != NULL) - { - clReleaseCommandQueue(mMem); - } - } - - clCommandQueueWrapper &operator=(const cl_command_queue &rhs) - { - mMem = rhs; - return *this; - } - operator cl_command_queue() const { return mMem; } - - cl_command_queue *operator&() { return &mMem; } - - bool operator==(const cl_command_queue &rhs) { return mMem == rhs; } - -protected: - cl_command_queue mMem; -}; - -/* cl_sampler wrapper */ -class clSamplerWrapper { -public: - clSamplerWrapper() { mMem = NULL; } - clSamplerWrapper(cl_sampler mem) { mMem = mem; } - ~clSamplerWrapper() - { - if (mMem != NULL) clReleaseSampler(mMem); - } - - clSamplerWrapper &operator=(const cl_sampler &rhs) - { - mMem = rhs; - return *this; - } - operator cl_sampler() const { return mMem; } - - cl_sampler *operator&() { return &mMem; } - - bool operator==(const cl_sampler &rhs) { return mMem == rhs; } - -protected: - cl_sampler mMem; -}; - -/* cl_event wrapper */ -class clEventWrapper { -public: - clEventWrapper() { mMem = NULL; } - clEventWrapper(cl_event mem) { mMem = mem; } - ~clEventWrapper() - { - if (mMem != NULL) clReleaseEvent(mMem); - } - - clEventWrapper &operator=(const cl_event &rhs) - { - mMem = rhs; - return *this; - } - operator cl_event() const { return mMem; } - - cl_event *operator&() { return &mMem; } - - bool operator==(const cl_event &rhs) { return mMem == rhs; } - -protected: - cl_event mMem; -}; - /* Generic protected memory buffer, for verifying access within bounds */ class clProtectedArray { public: diff --git a/test_conformance/CMakeLists.txt b/test_conformance/CMakeLists.txt index 6714e234be..363ece8698 100644 --- a/test_conformance/CMakeLists.txt +++ b/test_conformance/CMakeLists.txt @@ -21,6 +21,7 @@ if(D3D11_IS_SUPPORTED) endif(D3D11_IS_SUPPORTED) add_subdirectory( device_partition ) add_subdirectory( events ) +add_subdirectory( extensions ) add_subdirectory( geometrics ) if(GL_IS_SUPPORTED) add_subdirectory( gl ) @@ -49,15 +50,15 @@ add_subdirectory( subgroups ) add_subdirectory( workgroups ) add_subdirectory( pipes ) add_subdirectory( device_timer ) -if(KHRONOS_OFFLINE_COMPILER) - add_subdirectory( clcpp ) -endif() add_subdirectory( spirv_new ) add_subdirectory( spir ) file(GLOB CSV_FILES "opencl_conformance_tests_*.csv") -set(PY_FILES run_conformance.py) +set(PY_FILES + generate_spirv_offline.py + run_conformance.py +) # Copy .csv files foreach(FILE ${CSV_FILES}) diff --git a/test_conformance/SVM/main.cpp b/test_conformance/SVM/main.cpp index 0a052f0503..56fb24f1a1 100644 --- a/test_conformance/SVM/main.cpp +++ b/test_conformance/SVM/main.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -213,14 +213,15 @@ cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeSt return -1; } bool extensions_supported = true; - for (auto extension : extensions_list) + for (auto extension : extensions_list) { - if (!is_extension_available(devices[i], extension.c_str())) - { - log_error("Required extension not found - device id %d - %s\n", i, extension.c_str()); - extensions_supported = false; - break; - } + if (!is_extension_available(devices[i], extension.c_str())) + { + log_error("Required extension not found - device id %d - %s\n", i, + extension.c_str()); + extensions_supported = false; + break; + } } if((caps & required_svm_caps) == required_svm_caps && extensions_supported) { @@ -249,10 +250,11 @@ cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeSt test_error(error, "clCreateCommandQueue failed"); } - if(ppCodeString) + if (ppCodeString) { - error = create_single_kernel_helper(*context, program, 0, 1, ppCodeString, 0, "-cl-std=CL2.0"); - test_error( error, "failed to create program" ); + error = + create_single_kernel_helper(*context, program, 0, 1, ppCodeString, 0); + test_error(error, "failed to create program"); } return 0; diff --git a/test_conformance/SVM/test_fine_grain_memory_consistency.cpp b/test_conformance/SVM/test_fine_grain_memory_consistency.cpp index 42ea0bd2ad..b28db41190 100644 --- a/test_conformance/SVM/test_fine_grain_memory_consistency.cpp +++ b/test_conformance/SVM/test_fine_grain_memory_consistency.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -16,27 +16,33 @@ #include "common.h" static char hash_table_kernel[] = - "#if 0\n" - "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n" - "#endif\n" - "typedef struct BinNode {\n" - " int value;\n" - " atomic_uintptr_t pNext;\n" - "} BinNode;\n" - - "__kernel void build_hash_table(__global uint* input, __global BinNode* pNodes, volatile __global atomic_uint* pNumNodes, uint numBins)\n" - "{\n" - " __global BinNode *pNew = &pNodes[ atomic_fetch_add_explicit(pNumNodes, 1, memory_order_relaxed, memory_scope_all_svm_devices) ];\n" - " uint i = get_global_id(0);\n" - " uint b = input[i] % numBins;\n" - " pNew->value = input[i];\n" - " uintptr_t next = atomic_load_explicit(&(pNodes[b].pNext), memory_order_seq_cst, memory_scope_all_svm_devices);\n" - " do\n" - " {\n" - " atomic_store_explicit(&(pNew->pNext), next, memory_order_seq_cst, memory_scope_all_svm_devices);\n" // always inserting at head of list - " } while(!atomic_compare_exchange_strong_explicit(&(pNodes[b].pNext), &next, (uintptr_t)pNew, memory_order_seq_cst, memory_order_relaxed, memory_scope_all_svm_devices));\n" - "}\n"; + "#if 0\n" + "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n" + "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n" + "#endif\n" + "typedef struct BinNode {\n" + " int value;\n" + " atomic_uintptr_t pNext;\n" + "} BinNode;\n" + + "__kernel void build_hash_table(__global uint* input, __global BinNode* " + "pNodes, volatile __global atomic_uint* pNumNodes, uint numBins)\n" + "{\n" + " __global BinNode *pNew = &pNodes[ atomic_fetch_add_explicit(pNumNodes, " + "1u, memory_order_relaxed, memory_scope_all_svm_devices) ];\n" + " uint i = get_global_id(0);\n" + " uint b = input[i] % numBins;\n" + " pNew->value = input[i];\n" + " uintptr_t next = atomic_load_explicit(&(pNodes[b].pNext), " + "memory_order_seq_cst, memory_scope_all_svm_devices);\n" + " do\n" + " {\n" + " atomic_store_explicit(&(pNew->pNext), next, memory_order_seq_cst, " + "memory_scope_all_svm_devices);\n" // always inserting at head of list + " } while(!atomic_compare_exchange_strong_explicit(&(pNodes[b].pNext), " + "&next, (uintptr_t)pNew, memory_order_seq_cst, memory_order_relaxed, " + "memory_scope_all_svm_devices));\n" + "}\n"; typedef struct BinNode{ cl_uint value; diff --git a/test_conformance/SVM/test_fine_grain_sync_buffers.cpp b/test_conformance/SVM/test_fine_grain_sync_buffers.cpp index 4cc34952b3..0b94cbf2f5 100644 --- a/test_conformance/SVM/test_fine_grain_sync_buffers.cpp +++ b/test_conformance/SVM/test_fine_grain_sync_buffers.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -17,15 +17,19 @@ const char *find_targets_kernel[] = { - "__kernel void find_targets(__global uint* image, uint target, volatile __global atomic_uint *numTargetsFound, volatile __global atomic_uint *targetLocations)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " uint index;\n" - " if(image[i] == target) {\n" - " index = atomic_fetch_add_explicit(numTargetsFound, 1, memory_order_relaxed, memory_scope_device); \n" - " atomic_exchange_explicit(&targetLocations[index], i, memory_order_relaxed, memory_scope_all_svm_devices); \n" - " }\n" - "}\n" + "__kernel void find_targets(__global uint* image, uint target, volatile " + "__global atomic_uint *numTargetsFound, volatile __global atomic_uint " + "*targetLocations)\n" + "{\n" + " size_t i = get_global_id(0);\n" + " uint index;\n" + " if(image[i] == target) {\n" + " index = atomic_fetch_add_explicit(numTargetsFound, 1u, " + "memory_order_relaxed, memory_scope_device); \n" + " atomic_exchange_explicit(&targetLocations[index], i, " + "memory_order_relaxed, memory_scope_all_svm_devices); \n" + " }\n" + "}\n" }; diff --git a/test_conformance/allocations/allocation_fill.cpp b/test_conformance/allocations/allocation_fill.cpp index a75589427b..b4ea379864 100644 --- a/test_conformance/allocations/allocation_fill.cpp +++ b/test_conformance/allocations/allocation_fill.cpp @@ -200,8 +200,10 @@ int fill_image_with_data(cl_context context, cl_device_id device_id, cl_command_ result = clFinish(*queue); if (result != SUCCEEDED) { - print_error(error, "clFinish failed after successful enquing filling buffer with data."); - return result; + print_error(error, + "clFinish failed after successful enqueuing filling " + "buffer with data."); + return result; } } else { error = clEnqueueWriteImage(*queue, mem, CL_FALSE, origin, region, 0, 0, data, 0, NULL, &event); diff --git a/test_conformance/allocations/allocation_functions.cpp b/test_conformance/allocations/allocation_functions.cpp index 7182c7271e..827ee1042d 100644 --- a/test_conformance/allocations/allocation_functions.cpp +++ b/test_conformance/allocations/allocation_functions.cpp @@ -37,8 +37,8 @@ int find_good_image_size(cl_device_id device_id, size_t size_to_allocate, size_t } if (size_to_allocate == 0) { - log_error("Trying to allcoate a zero sized image.\n"); - return FAILED_ABORT; + log_error("Trying to allocate a zero sized image.\n"); + return FAILED_ABORT; } error = clGetDeviceInfo( device_id, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( max_width ), &max_width, NULL ); diff --git a/test_conformance/allocations/main.cpp b/test_conformance/allocations/main.cpp index 0dec4c6dd7..43e81277ed 100644 --- a/test_conformance/allocations/main.cpp +++ b/test_conformance/allocations/main.cpp @@ -112,6 +112,8 @@ int doTest( cl_device_id device, cl_context context, cl_command_queue queue, All int number_of_mems_used; cl_ulong max_individual_allocation_size = g_max_individual_allocation_size; cl_ulong global_mem_size = g_global_mem_size ; + const bool allocate_image = + (alloc_type != BUFFER) && (alloc_type != BUFFER_NON_BLOCKING); static const char* alloc_description[] = { "buffer(s)", @@ -123,7 +125,7 @@ int doTest( cl_device_id device, cl_context context, cl_command_queue queue, All }; // Skip image tests if we don't support images on the device - if( alloc_type > BUFFER && checkForImageSupport( device ) ) + if (allocate_image && checkForImageSupport(device)) { log_info( "Can not test image allocation because device does not support images.\n" ); return 0; @@ -132,7 +134,7 @@ int doTest( cl_device_id device, cl_context context, cl_command_queue queue, All // This section was added in order to fix a bug in the test // If CL_DEVICE_MAX_MEM_ALLOC_SIZE is much grater than CL_DEVICE_IMAGE2D_MAX_WIDTH * CL_DEVICE_IMAGE2D_MAX_HEIGHT // The test will fail in image allocations as the size requested for the allocation will be much grater than the maximum size allowed for image - if( ( alloc_type != BUFFER ) && ( alloc_type != BUFFER_NON_BLOCKING ) ) + if (allocate_image) { size_t max_width, max_height; diff --git a/test_conformance/api/CMakeLists.txt b/test_conformance/api/CMakeLists.txt index eedf6b490d..d3e6c6a7c4 100644 --- a/test_conformance/api/CMakeLists.txt +++ b/test_conformance/api/CMakeLists.txt @@ -2,18 +2,19 @@ set(MODULE_NAME API) set(${MODULE_NAME}_SOURCES main.cpp + negative_platform.cpp test_api_consistency.cpp test_bool.cpp test_retain.cpp test_retain_program.cpp test_queries.cpp - test_queries_compatibility.cpp test_create_kernels.cpp test_kernels.cpp test_kernel_private_memory_size.cpp test_api_min_max.cpp test_kernel_arg_changes.cpp test_kernel_arg_multi_setup.cpp + test_kernel_attributes.cpp test_binary.cpp test_native_kernel.cpp test_mem_objects.cpp @@ -21,7 +22,6 @@ set(${MODULE_NAME}_SOURCES test_device_min_data_type_align_size_alignment.cpp test_platform.cpp test_kernel_arg_info.cpp - test_kernel_arg_info_compatibility.cpp test_null_buffer_arg.cpp test_mem_object_info.cpp test_min_image_formats.cpp diff --git a/test_conformance/api/main.cpp b/test_conformance/api/main.cpp index 10e2b57eba..fa76a4064f 100644 --- a/test_conformance/api/main.cpp +++ b/test_conformance/api/main.cpp @@ -51,7 +51,6 @@ test_definition test_list[] = { ADD_TEST(load_two_kernels_manually), ADD_TEST(get_program_info_kernel_names), ADD_TEST(get_kernel_arg_info), - ADD_TEST(get_kernel_arg_info_compatibility), ADD_TEST(create_kernels_in_program), ADD_TEST(get_kernel_info), ADD_TEST(kernel_private_memory_size), @@ -60,6 +59,7 @@ test_definition test_list[] = { ADD_TEST(set_kernel_arg_constant), ADD_TEST(set_kernel_arg_struct_array), ADD_TEST(kernel_global_constant), + ADD_TEST(kernel_attributes), ADD_TEST(min_max_thread_dimensions), ADD_TEST(min_max_work_items_sizes), @@ -146,6 +146,8 @@ test_definition test_list[] = { ADD_TEST_VERSION(consistency_3d_image_writes, Version(3, 0)), ADD_TEST(min_image_formats), + ADD_TEST(negative_get_platform_info), + ADD_TEST(negative_get_platform_ids), }; const int test_num = ARRAY_SIZE(test_list); diff --git a/test_conformance/api/negative_platform.cpp b/test_conformance/api/negative_platform.cpp new file mode 100644 index 0000000000..861d47484e --- /dev/null +++ b/test_conformance/api/negative_platform.cpp @@ -0,0 +1,65 @@ +// +// Copyright (c) 2021 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "testBase.h" + +int test_negative_get_platform_ids(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + cl_platform_id platform; + cl_int err = clGetPlatformIDs(0, &platform, nullptr); + test_failure_error_ret( + err, CL_INVALID_VALUE, + "clGetPlatformIDs should return CL_INVALID_VALUE when: \"num_entries " + "is equal to zero and platforms is not NULL\"", + TEST_FAIL); + + err = clGetPlatformIDs(1, nullptr, nullptr); + test_failure_error_ret( + err, CL_INVALID_VALUE, + "clGetPlatformIDs should return CL_INVALID_VALUE when: \"both " + "num_platforms and platforms are NULL\"", + TEST_FAIL); + + return TEST_PASS; +} + +int test_negative_get_platform_info(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + cl_platform_id platform = getPlatformFromDevice(deviceID); + + constexpr cl_platform_info INVALID_PARAM_VALUE = 0; + cl_int err = + clGetPlatformInfo(platform, INVALID_PARAM_VALUE, 0, nullptr, nullptr); + test_failure_error_ret( + err, CL_INVALID_VALUE, + "clGetPlatformInfo should return CL_INVALID_VALUE when: \"param_name " + "is not one of the supported values\"", + TEST_FAIL); + + char* version; + err = + clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, &version, nullptr); + test_failure_error_ret( + err, CL_INVALID_VALUE, + "clGetPlatformInfo should return CL_INVALID_VALUE when: \"size in " + "bytes specified by param_value_size is < size of return type and " + "param_value is not a NULL value\"", + TEST_FAIL); + + return TEST_PASS; +} diff --git a/test_conformance/api/procs.h b/test_conformance/api/procs.h index 0dcc9a6901..1bcb311626 100644 --- a/test_conformance/api/procs.h +++ b/test_conformance/api/procs.h @@ -119,7 +119,6 @@ extern int test_get_image1d_info( cl_device_id deviceID, cl_context context extern int test_get_image1d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements ); extern int test_get_image2d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements ); extern int test_get_kernel_arg_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ); -extern int test_get_kernel_arg_info_compatibility( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ); extern int test_queue_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_sub_group_dispatch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_clone_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); @@ -195,3 +194,13 @@ extern int test_consistency_3d_image_writes(cl_device_id deviceID, extern int test_min_image_formats(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); +extern int test_negative_get_platform_info(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_negative_get_platform_ids(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_kernel_attributes(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); diff --git a/test_conformance/api/test_api_min_max.cpp b/test_conformance/api/test_api_min_max.cpp index 9ac4aae334..28ca823776 100644 --- a/test_conformance/api/test_api_min_max.cpp +++ b/test_conformance/api/test_api_min_max.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -24,7 +24,8 @@ const char *sample_single_param_kernel[] = { "{\n" " int tid = get_global_id(0);\n" "\n" - "}\n" }; + "}\n" +}; const char *sample_single_param_write_kernel[] = { "__kernel void sample_test(__global int *src)\n" @@ -32,23 +33,29 @@ const char *sample_single_param_write_kernel[] = { " int tid = get_global_id(0);\n" " src[tid] = tid;\n" "\n" - "}\n" }; + "}\n" +}; const char *sample_read_image_kernel_pattern[] = { - "__kernel void sample_test( __global float *result, ", " )\n" + "__kernel void sample_test( __global float *result, ", + " )\n" "{\n" - " sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;\n" + " sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | " + "CLK_FILTER_NEAREST;\n" " int tid = get_global_id(0);\n" " result[0] = 0.0f;\n", "\n" - "}\n" }; + "}\n" +}; const char *sample_write_image_kernel_pattern[] = { - "__kernel void sample_test( ", " )\n" + "__kernel void sample_test( ", + " )\n" "{\n" " int tid = get_global_id(0);\n", "\n" - "}\n" }; + "}\n" +}; const char *sample_large_parmam_kernel_pattern[] = { @@ -57,7 +64,8 @@ const char *sample_large_parmam_kernel_pattern[] = { "result[0] = 0;\n" "%s" "\n" - "}\n" }; + "}\n" +}; const char *sample_large_int_parmam_kernel_pattern[] = { "__kernel void sample_test(%s, __global int *result)\n" @@ -65,15 +73,19 @@ const char *sample_large_int_parmam_kernel_pattern[] = { "result[0] = 0;\n" "%s" "\n" - "}\n" }; + "}\n" +}; const char *sample_sampler_kernel_pattern[] = { - "__kernel void sample_test( read_only image2d_t src, __global int4 *dst", ", sampler_t sampler%d", ")\n" + "__kernel void sample_test( read_only image2d_t src, __global int4 *dst", + ", sampler_t sampler%d", + ")\n" "{\n" " int tid = get_global_id(0);\n", " dst[ 0 ] = read_imagei( src, sampler%d, (int2)( 0, 0 ) );\n", "\n" - "}\n" }; + "}\n" +}; const char *sample_const_arg_kernel[] = { "__kernel void sample_test(__constant int *src1, __global int *dst)\n" @@ -82,10 +94,12 @@ const char *sample_const_arg_kernel[] = { "\n" " dst[tid] = src1[tid];\n" "\n" - "}\n" }; + "}\n" +}; const char *sample_local_arg_kernel[] = { - "__kernel void sample_test(__local int *src1, __global int *global_src, __global int *dst)\n" + "__kernel void sample_test(__local int *src1, __global int *global_src, " + "__global int *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" @@ -93,19 +107,21 @@ const char *sample_local_arg_kernel[] = { " barrier(CLK_GLOBAL_MEM_FENCE);\n" " dst[tid] = src1[tid];\n" "\n" - "}\n" }; + "}\n" +}; const char *sample_const_max_arg_kernel_pattern = -"__kernel void sample_test(__constant int *src1 %s, __global int *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = src1[tid];\n" -"%s" -"\n" -"}\n"; - -int test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) + "__kernel void sample_test(__constant int *src1 %s, __global int *dst)\n" + "{\n" + " int tid = get_global_id(0);\n" + "\n" + " dst[tid] = src1[tid];\n" + "%s" + "\n" + "}\n"; + +int test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error, retVal; unsigned int maxThreadDim, threadDim, i; @@ -118,19 +134,24 @@ int test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context, cl /* Get the max thread dimensions */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( maxThreadDim ), &maxThreadDim, NULL ); - test_error( error, "Unable to get max work item dimensions from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, + sizeof(maxThreadDim), &maxThreadDim, NULL); + test_error(error, "Unable to get max work item dimensions from device"); - if( maxThreadDim < 3 ) + if (maxThreadDim < 3) { - log_error( "ERROR: Reported max work item dimensions is less than required! (%d)\n", maxThreadDim ); + log_error("ERROR: Reported max work item dimensions is less than " + "required! (%d)\n", + maxThreadDim); return -1; } log_info("Reported max thread dimensions of %d.\n", maxThreadDim); /* Create a kernel to test with */ - if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_param_kernel, "sample_test" ) != 0 ) + if (create_single_kernel_helper(context, &program, &kernel, 1, + sample_single_param_kernel, "sample_test") + != 0) { return -1; } @@ -138,105 +159,122 @@ int test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context, cl /* Create some I/O streams */ streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int) * 100, NULL, &error); - if( streams[0] == NULL ) + if (streams[0] == NULL) { log_error("ERROR: Creating test array failed!\n"); return -1; } /* Set the arguments */ - error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] ); - test_error( error, "Unable to set kernel arguments" ); + error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]); + test_error(error, "Unable to set kernel arguments"); retVal = 0; /* Now try running the kernel with up to that many threads */ - for (threadDim=1; threadDim <= maxThreadDim; threadDim++) + for (threadDim = 1; threadDim <= maxThreadDim; threadDim++) { - threads = (size_t *)malloc( sizeof( size_t ) * maxThreadDim ); - localThreads = (size_t *)malloc( sizeof( size_t ) * maxThreadDim ); - for( i = 0; i < maxThreadDim; i++ ) + threads = (size_t *)malloc(sizeof(size_t) * maxThreadDim); + localThreads = (size_t *)malloc(sizeof(size_t) * maxThreadDim); + for (i = 0; i < maxThreadDim; i++) { - threads[ i ] = 1; + threads[i] = 1; localThreads[i] = 1; } - error = clEnqueueNDRangeKernel( queue, kernel, maxThreadDim, NULL, threads, localThreads, 0, NULL, &event ); - test_error( error, "Failed clEnqueueNDRangeKernel"); + error = clEnqueueNDRangeKernel(queue, kernel, maxThreadDim, NULL, + threads, localThreads, 0, NULL, &event); + test_error(error, "Failed clEnqueueNDRangeKernel"); // Verify that the event does not return an error from the execution error = clWaitForEvents(1, &event); - test_error( error, "clWaitForEvent failed"); - error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL); - test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed"); + test_error(error, "clWaitForEvent failed"); + error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(event_status), &event_status, NULL); + test_error( + error, + "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed"); clReleaseEvent(event); if (event_status < 0) test_error(error, "Kernel execution event returned error"); /* All done */ - free( threads ); - free( localThreads ); + free(threads); + free(localThreads); } return retVal; } -int test_min_max_work_items_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_work_items_sizes(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; size_t *deviceMaxWorkItemSize; unsigned int maxWorkItemDim; /* Get the max work item dimensions */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( maxWorkItemDim ), &maxWorkItemDim, NULL ); - test_error( error, "Unable to get max work item dimensions from device" ); - - log_info("CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS returned %d\n", maxWorkItemDim); - deviceMaxWorkItemSize = (size_t*)malloc(sizeof(size_t)*maxWorkItemDim); - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t)*maxWorkItemDim, deviceMaxWorkItemSize, NULL ); - test_error( error, "clDeviceInfo for CL_DEVICE_MAX_WORK_ITEM_SIZES failed" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, + sizeof(maxWorkItemDim), &maxWorkItemDim, NULL); + test_error(error, "Unable to get max work item dimensions from device"); + + log_info("CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS returned %d\n", + maxWorkItemDim); + deviceMaxWorkItemSize = (size_t *)malloc(sizeof(size_t) * maxWorkItemDim); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, + sizeof(size_t) * maxWorkItemDim, + deviceMaxWorkItemSize, NULL); + test_error(error, "clDeviceInfo for CL_DEVICE_MAX_WORK_ITEM_SIZES failed"); unsigned int i; int errors = 0; - for(i=0; i= 128 && maxParameterSize == 1024) { - error = clGetDeviceInfo( deviceID, CL_DEVICE_TYPE, sizeof( deviceType ), &deviceType, NULL ); - test_error( error, "Unable to get device type from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_TYPE, sizeof(deviceType), + &deviceType, NULL); + test_error(error, "Unable to get device type from device"); - if(deviceType != CL_DEVICE_TYPE_CUSTOM) + if (deviceType != CL_DEVICE_TYPE_CUSTOM) { maxReadImages = 127; } @@ -295,85 +340,107 @@ int test_min_max_read_image_args(cl_device_id deviceID, cl_context context, cl_c maxParameterSize -= deviceAddressSize; // Calculate the number we can use - if (maxParameterSize/deviceAddressSize < maxReadImages) { - log_info("WARNING: Max parameter size of %d bytes limits test to %d max image arguments.\n", (int)maxParameterSize, (int)(maxParameterSize/deviceAddressSize)); - maxReadImages = (unsigned int)(maxParameterSize/deviceAddressSize); + if (maxParameterSize / deviceAddressSize < maxReadImages) + { + log_info("WARNING: Max parameter size of %d bytes limits test to %d " + "max image arguments.\n", + (int)maxParameterSize, + (int)(maxParameterSize / deviceAddressSize)); + maxReadImages = (unsigned int)(maxParameterSize / deviceAddressSize); } /* Create a program with that many read args */ - programSrc = (char *)malloc( strlen( sample_read_image_kernel_pattern[ 0 ] ) + ( strlen( readArgPattern ) + 6 ) * ( maxReadImages ) + - strlen( sample_read_image_kernel_pattern[ 1 ] ) + 1 + 40240); + programSrc = (char *)malloc(strlen(sample_read_image_kernel_pattern[0]) + + (strlen(readArgPattern) + 6) * (maxReadImages) + + strlen(sample_read_image_kernel_pattern[1]) + + 1 + 40240); - strcpy( programSrc, sample_read_image_kernel_pattern[ 0 ] ); - strcat( programSrc, "read_only image2d_t srcimg0" ); - for( i = 0; i < maxReadImages-1; i++ ) + strcpy(programSrc, sample_read_image_kernel_pattern[0]); + strcat(programSrc, "read_only image2d_t srcimg0"); + for (i = 0; i < maxReadImages - 1; i++) { - sprintf( readArgLine, readArgPattern, i+1 ); - strcat( programSrc, readArgLine ); + sprintf(readArgLine, readArgPattern, i + 1); + strcat(programSrc, readArgLine); } - strcat( programSrc, sample_read_image_kernel_pattern[ 1 ] ); - for ( i = 0; i < maxReadImages; i++) { - sprintf( readArgLine, "\tresult[0] += read_imagef( srcimg%d, sampler, (int2)(0,0)).x;\n", i); - strcat( programSrc, readArgLine ); + strcat(programSrc, sample_read_image_kernel_pattern[1]); + for (i = 0; i < maxReadImages; i++) + { + sprintf( + readArgLine, + "\tresult[0] += read_imagef( srcimg%d, sampler, (int2)(0,0)).x;\n", + i); + strcat(programSrc, readArgLine); } - strcat( programSrc, sample_read_image_kernel_pattern[ 2 ] ); + strcat(programSrc, sample_read_image_kernel_pattern[2]); - error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&programSrc, "sample_test"); - test_error( error, "Failed to create the program and kernel."); - free( programSrc ); + error = + create_single_kernel_helper(context, &program, &kernel, 1, + (const char **)&programSrc, "sample_test"); + test_error(error, "Failed to create the program and kernel."); + free(programSrc); result = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float), NULL, &error); - test_error( error, "clCreateBufer failed"); + test_error(error, "clCreateBufer failed"); /* Create some I/O streams */ streams = new clMemWrapper[maxReadImages + 1]; - for( i = 0; i < maxReadImages; i++ ) + for (i = 0; i < maxReadImages; i++) { - image_data[0]=i; - image_result+= image_data[0]; - streams[i] = create_image_2d( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &image_format_desc, 4, 4, 0, image_data, &error ); - test_error( error, "Unable to allocate test image" ); + image_data[0] = i; + image_result += image_data[0]; + streams[i] = + create_image_2d(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, + &image_format_desc, 4, 4, 0, image_data, &error); + test_error(error, "Unable to allocate test image"); } - error = clSetKernelArg( kernel, 0, sizeof( result ), &result ); - test_error( error, "Unable to set kernel arguments" ); + error = clSetKernelArg(kernel, 0, sizeof(result), &result); + test_error(error, "Unable to set kernel arguments"); /* Set the arguments */ - for( i = 1; i < maxReadImages+1; i++ ) + for (i = 1; i < maxReadImages + 1; i++) { - error = clSetKernelArg( kernel, i, sizeof( streams[i-1] ), &streams[i-1] ); - test_error( error, "Unable to set kernel arguments" ); + error = + clSetKernelArg(kernel, i, sizeof(streams[i - 1]), &streams[i - 1]); + test_error(error, "Unable to set kernel arguments"); } /* Now try running the kernel */ threads[0] = threads[1] = 1; - error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, &event ); - test_error( error, "clEnqueueNDRangeKernel failed"); + error = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, threads, NULL, 0, + NULL, &event); + test_error(error, "clEnqueueNDRangeKernel failed"); // Verify that the event does not return an error from the execution error = clWaitForEvents(1, &event); - test_error( error, "clWaitForEvent failed"); - error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL); - test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed"); + test_error(error, "clWaitForEvent failed"); + error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(event_status), &event_status, NULL); + test_error(error, + "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed"); clReleaseEvent(event); if (event_status < 0) test_error(error, "Kernel execution event returned error"); - error = clEnqueueReadBuffer(queue, result, CL_TRUE, 0, sizeof(cl_float), &actual_image_result, 0, NULL, NULL); + error = clEnqueueReadBuffer(queue, result, CL_TRUE, 0, sizeof(cl_float), + &actual_image_result, 0, NULL, NULL); test_error(error, "clEnqueueReadBuffer failed"); delete[] streams; - if (actual_image_result != image_result) { - log_error("Result failed to verify. Got %g, expected %g.\n", actual_image_result, image_result); + if (actual_image_result != image_result) + { + log_error("Result failed to verify. Got %g, expected %g.\n", + actual_image_result, image_result); return 1; } return 0; } -int test_min_max_write_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_write_image_args(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; unsigned int maxWriteImages, i; @@ -381,94 +448,117 @@ int test_min_max_write_image_args(cl_device_id deviceID, cl_context context, cl_ char writeArgLine[128], *programSrc; const char *writeArgPattern = ", write_only image2d_t dstimg%d"; clKernelWrapper kernel; - clMemWrapper *streams; + clMemWrapper *streams; size_t threads[2]; - cl_image_format image_format_desc; + cl_image_format image_format_desc; size_t maxParameterSize; cl_event event; cl_int event_status; cl_uint minRequiredWriteImages = gIsEmbedded ? 1 : 8; - PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID ) + PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID) image_format_desc.image_channel_order = CL_RGBA; image_format_desc.image_channel_data_type = CL_UNORM_INT8; /* Get the max read image arg count */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof( maxWriteImages ), &maxWriteImages, NULL ); - test_error( error, "Unable to get max write image arg count from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, + sizeof(maxWriteImages), &maxWriteImages, NULL); + test_error(error, "Unable to get max write image arg count from device"); - if( maxWriteImages == 0 ) + if (maxWriteImages == 0) { - log_info( "WARNING: Device reports 0 for a max write image arg count (write image arguments unsupported). Skipping test (implicitly passes). This is only valid if the number of image formats is also 0.\n" ); + log_info( + "WARNING: Device reports 0 for a max write image arg count (write " + "image arguments unsupported). Skipping test (implicitly passes). " + "This is only valid if the number of image formats is also 0.\n"); return 0; } - if( maxWriteImages < minRequiredWriteImages ) + if (maxWriteImages < minRequiredWriteImages) { - log_error( "ERROR: Reported max write image arg count is less than required! (%d)\n", maxWriteImages ); + log_error("ERROR: Reported max write image arg count is less than " + "required! (%d)\n", + maxWriteImages); return -1; } log_info("Reported %d max write image args.\n", maxWriteImages); - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxParameterSize ), &maxParameterSize, NULL ); - test_error( error, "Unable to get max parameter size from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, + sizeof(maxParameterSize), &maxParameterSize, NULL); + test_error(error, "Unable to get max parameter size from device"); // Calculate the number we can use - if (maxParameterSize/sizeof(cl_mem) < maxWriteImages) { - log_info("WARNING: Max parameter size of %d bytes limits test to %d max image arguments.\n", (int)maxParameterSize, (int)(maxParameterSize/sizeof(cl_mem))); - maxWriteImages = (unsigned int)(maxParameterSize/sizeof(cl_mem)); + if (maxParameterSize / sizeof(cl_mem) < maxWriteImages) + { + log_info("WARNING: Max parameter size of %d bytes limits test to %d " + "max image arguments.\n", + (int)maxParameterSize, + (int)(maxParameterSize / sizeof(cl_mem))); + maxWriteImages = (unsigned int)(maxParameterSize / sizeof(cl_mem)); } /* Create a program with that many write args + 1 */ - programSrc = (char *)malloc( strlen( sample_write_image_kernel_pattern[ 0 ] ) + ( strlen( writeArgPattern ) + 6 ) * ( maxWriteImages + 1 ) + - strlen( sample_write_image_kernel_pattern[ 1 ] ) + 1 + 40240 ); + programSrc = (char *)malloc( + strlen(sample_write_image_kernel_pattern[0]) + + (strlen(writeArgPattern) + 6) * (maxWriteImages + 1) + + strlen(sample_write_image_kernel_pattern[1]) + 1 + 40240); - strcpy( programSrc, sample_write_image_kernel_pattern[ 0 ] ); - strcat( programSrc, "write_only image2d_t dstimg0" ); - for( i = 1; i < maxWriteImages; i++ ) + strcpy(programSrc, sample_write_image_kernel_pattern[0]); + strcat(programSrc, "write_only image2d_t dstimg0"); + for (i = 1; i < maxWriteImages; i++) { - sprintf( writeArgLine, writeArgPattern, i ); - strcat( programSrc, writeArgLine ); + sprintf(writeArgLine, writeArgPattern, i); + strcat(programSrc, writeArgLine); } - strcat( programSrc, sample_write_image_kernel_pattern[ 1 ] ); - for ( i = 0; i < maxWriteImages; i++) { - sprintf( writeArgLine, "\twrite_imagef( dstimg%d, (int2)(0,0), (float4)(0,0,0,0));\n", i); - strcat( programSrc, writeArgLine ); + strcat(programSrc, sample_write_image_kernel_pattern[1]); + for (i = 0; i < maxWriteImages; i++) + { + sprintf(writeArgLine, + "\twrite_imagef( dstimg%d, (int2)(0,0), (float4)(0,0,0,0));\n", + i); + strcat(programSrc, writeArgLine); } - strcat( programSrc, sample_write_image_kernel_pattern[ 2 ] ); + strcat(programSrc, sample_write_image_kernel_pattern[2]); - error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&programSrc, "sample_test"); - test_error( error, "Failed to create the program and kernel."); - free( programSrc ); + error = + create_single_kernel_helper(context, &program, &kernel, 1, + (const char **)&programSrc, "sample_test"); + test_error(error, "Failed to create the program and kernel."); + free(programSrc); /* Create some I/O streams */ streams = new clMemWrapper[maxWriteImages + 1]; - for( i = 0; i < maxWriteImages; i++ ) + for (i = 0; i < maxWriteImages; i++) { - streams[i] = create_image_2d( context, CL_MEM_READ_WRITE, &image_format_desc, 16, 16, 0, NULL, &error ); - test_error( error, "Unable to allocate test image" ); + streams[i] = + create_image_2d(context, CL_MEM_READ_WRITE, &image_format_desc, 16, + 16, 0, NULL, &error); + test_error(error, "Unable to allocate test image"); } /* Set the arguments */ - for( i = 0; i < maxWriteImages; i++ ) + for (i = 0; i < maxWriteImages; i++) { - error = clSetKernelArg( kernel, i, sizeof( streams[i] ), &streams[i] ); - test_error( error, "Unable to set kernel arguments" ); + error = clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]); + test_error(error, "Unable to set kernel arguments"); } /* Now try running the kernel */ threads[0] = threads[1] = 16; - error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, &event ); - test_error( error, "clEnqueueNDRangeKernel failed."); + error = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, threads, NULL, 0, + NULL, &event); + test_error(error, "clEnqueueNDRangeKernel failed."); // Verify that the event does not return an error from the execution error = clWaitForEvents(1, &event); - test_error( error, "clWaitForEvent failed"); - error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL); - test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed"); + test_error(error, "clWaitForEvent failed"); + error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(event_status), &event_status, NULL); + test_error(error, + "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed"); clReleaseEvent(event); if (event_status < 0) test_error(error, "Kernel execution event returned error"); @@ -478,7 +568,8 @@ int test_min_max_write_image_args(cl_device_id deviceID, cl_context context, cl_ return 0; } -int test_min_max_mem_alloc_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_mem_alloc_size(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_ulong maxAllocSize, memSize, minSizeToTry; @@ -492,61 +583,89 @@ int test_min_max_mem_alloc_size(cl_device_id deviceID, cl_context context, cl_co requiredAllocSize = 128 * 1024 * 1024; /* Get the max mem alloc size */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL ); - test_error( error, "Unable to get max mem alloc size from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, + sizeof(maxAllocSize), &maxAllocSize, NULL); + test_error(error, "Unable to get max mem alloc size from device"); - error = clGetDeviceInfo( deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL ); - test_error( error, "Unable to get global memory size from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, + sizeof(memSize), &memSize, NULL); + test_error(error, "Unable to get global memory size from device"); - if (memSize > (cl_ulong)SIZE_MAX) { - memSize = (cl_ulong)SIZE_MAX; + if (memSize > (cl_ulong)SIZE_MAX) + { + memSize = (cl_ulong)SIZE_MAX; } - if( maxAllocSize < requiredAllocSize) + if (maxAllocSize < requiredAllocSize) { - log_error( "ERROR: Reported max allocation size is less than required %lldMB! (%llu or %lluMB, from a total mem size of %lldMB)\n", (requiredAllocSize / 1024) / 1024, maxAllocSize, (maxAllocSize / 1024)/1024, (memSize / 1024)/1024 ); + log_error("ERROR: Reported max allocation size is less than required " + "%lldMB! (%llu or %lluMB, from a total mem size of %lldMB)\n", + (requiredAllocSize / 1024) / 1024, maxAllocSize, + (maxAllocSize / 1024) / 1024, (memSize / 1024) / 1024); return -1; } - requiredAllocSize = ((memSize / 4) > (1024 * 1024 * 1024)) ? 1024 * 1024 * 1024 : memSize / 4; + requiredAllocSize = ((memSize / 4) > (1024 * 1024 * 1024)) + ? 1024 * 1024 * 1024 + : memSize / 4; if (gIsEmbedded) - requiredAllocSize = (requiredAllocSize < 1 * 1024 * 1024) ? 1 * 1024 * 1024 : requiredAllocSize; + requiredAllocSize = (requiredAllocSize < 1 * 1024 * 1024) + ? 1 * 1024 * 1024 + : requiredAllocSize; else - requiredAllocSize = (requiredAllocSize < 128 * 1024 * 1024) ? 128 * 1024 * 1024 : requiredAllocSize; + requiredAllocSize = (requiredAllocSize < 128 * 1024 * 1024) + ? 128 * 1024 * 1024 + : requiredAllocSize; - if( maxAllocSize < requiredAllocSize ) + if (maxAllocSize < requiredAllocSize) { - log_error( "ERROR: Reported max allocation size is less than required of total memory! (%llu or %lluMB, from a total mem size of %lluMB)\n", maxAllocSize, (maxAllocSize / 1024)/1024, (requiredAllocSize / 1024)/1024 ); + log_error( + "ERROR: Reported max allocation size is less than required of " + "total memory! (%llu or %lluMB, from a total mem size of %lluMB)\n", + maxAllocSize, (maxAllocSize / 1024) / 1024, + (requiredAllocSize / 1024) / 1024); return -1; } - log_info("Reported max allocation size of %lld bytes (%gMB) and global mem size of %lld bytes (%gMB).\n", - maxAllocSize, maxAllocSize/(1024.0*1024.0), requiredAllocSize, requiredAllocSize/(1024.0*1024.0)); + log_info("Reported max allocation size of %lld bytes (%gMB) and global mem " + "size of %lld bytes (%gMB).\n", + maxAllocSize, maxAllocSize / (1024.0 * 1024.0), requiredAllocSize, + requiredAllocSize / (1024.0 * 1024.0)); - if ( memSize < maxAllocSize ) { - log_info("Global memory size is less than max allocation size, using that.\n"); + if (memSize < maxAllocSize) + { + log_info("Global memory size is less than max allocation size, using " + "that.\n"); maxAllocSize = memSize; } - minSizeToTry = maxAllocSize/16; - while (maxAllocSize > (maxAllocSize/4)) { + minSizeToTry = maxAllocSize / 16; + while (maxAllocSize > (maxAllocSize / 4)) + { - log_info("Trying to create a buffer of size of %lld bytes (%gMB).\n", maxAllocSize, (double)maxAllocSize/(1024.0*1024.0)); - memHdl = clCreateBuffer( context, CL_MEM_READ_ONLY, (size_t)maxAllocSize, NULL, &error ); - if (error == CL_MEM_OBJECT_ALLOCATION_FAILURE || error == CL_OUT_OF_RESOURCES || error == CL_OUT_OF_HOST_MEMORY) { - log_info("\tAllocation failed at size of %lld bytes (%gMB).\n", maxAllocSize, (double)maxAllocSize/(1024.0*1024.0)); + log_info("Trying to create a buffer of size of %lld bytes (%gMB).\n", + maxAllocSize, (double)maxAllocSize / (1024.0 * 1024.0)); + memHdl = clCreateBuffer(context, CL_MEM_READ_ONLY, (size_t)maxAllocSize, + NULL, &error); + if (error == CL_MEM_OBJECT_ALLOCATION_FAILURE + || error == CL_OUT_OF_RESOURCES || error == CL_OUT_OF_HOST_MEMORY) + { + log_info("\tAllocation failed at size of %lld bytes (%gMB).\n", + maxAllocSize, (double)maxAllocSize / (1024.0 * 1024.0)); maxAllocSize -= minSizeToTry; continue; } - test_error( error, "clCreateBuffer failed for maximum sized buffer."); + test_error(error, "clCreateBuffer failed for maximum sized buffer."); return 0; } - log_error("Failed to allocate even %lld bytes (%gMB).\n", maxAllocSize, (double)maxAllocSize/(1024.0*1024.0)); + log_error("Failed to allocate even %lld bytes (%gMB).\n", maxAllocSize, + (double)maxAllocSize / (1024.0 * 1024.0)); return -1; } -int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; size_t maxDimension; @@ -557,7 +676,7 @@ int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_co size_t length; - PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID ) + PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID) auto version = get_device_cl_version(deviceID); if (version == Version(1, 0)) @@ -571,16 +690,20 @@ int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_co /* Just get any ol format to test with */ - error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D, CL_MEM_READ_WRITE, 0, &image_format_desc ); - test_error( error, "Unable to obtain suitable image format to test with!" ); + error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE2D, + CL_MEM_READ_WRITE, 0, &image_format_desc); + test_error(error, "Unable to obtain suitable image format to test with!"); /* Get the max 2d image width */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxDimension ), &maxDimension, NULL ); - test_error( error, "Unable to get max image 2d width from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE2D_MAX_WIDTH, + sizeof(maxDimension), &maxDimension, NULL); + test_error(error, "Unable to get max image 2d width from device"); - if( maxDimension < minRequiredDimension ) + if (maxDimension < minRequiredDimension) { - log_error( "ERROR: Reported max image 2d width is less than required! (%d)\n", (int)maxDimension ); + log_error( + "ERROR: Reported max image 2d width is less than required! (%d)\n", + (int)maxDimension); return -1; } log_info("Max reported width is %ld.\n", maxDimension); @@ -588,34 +711,42 @@ int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_co /* Verify we can use the format */ image_format_desc.image_channel_data_type = CL_UNORM_INT8; image_format_desc.image_channel_order = CL_RGBA; - if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, &image_format_desc)) { + if (!is_image_format_supported(context, CL_MEM_READ_ONLY, + CL_MEM_OBJECT_IMAGE2D, &image_format_desc)) + { log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test."); return -1; } /* Verify that we can actually allocate an image that large */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL ); - test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." ); - if ( (cl_ulong)maxDimension*1*4 > maxAllocSize ) { - log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n", - (cl_ulong)maxDimension*1*4, maxAllocSize); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, + sizeof(maxAllocSize), &maxAllocSize, NULL); + test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE."); + if ((cl_ulong)maxDimension * 1 * 4 > maxAllocSize) + { + log_error("Can not allocate a large enough image (min size: %lld " + "bytes, max allowed: %lld bytes) to test.\n", + (cl_ulong)maxDimension * 1 * 4, maxAllocSize); return -1; } - log_info("Attempting to create an image of size %d x 1 = %gMB.\n", (int)maxDimension, ((float)maxDimension*4/1024.0/1024.0)); + log_info("Attempting to create an image of size %d x 1 = %gMB.\n", + (int)maxDimension, ((float)maxDimension * 4 / 1024.0 / 1024.0)); /* Try to allocate a very big image */ - streams[0] = create_image_2d( context, CL_MEM_READ_ONLY, &image_format_desc, maxDimension, 1, 0, NULL, &error ); - if( ( streams[0] == NULL ) || ( error != CL_SUCCESS )) + streams[0] = create_image_2d(context, CL_MEM_READ_ONLY, &image_format_desc, + maxDimension, 1, 0, NULL, &error); + if ((streams[0] == NULL) || (error != CL_SUCCESS)) { - print_error( error, "Image 2D creation failed for maximum width" ); + print_error(error, "Image 2D creation failed for maximum width"); return -1; } return 0; } -int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; size_t maxDimension; @@ -625,7 +756,7 @@ int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_c cl_uint minRequiredDimension; size_t length; - PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID ) + PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID) auto version = get_device_cl_version(deviceID); if (version == Version(1, 0)) @@ -638,16 +769,20 @@ int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_c } /* Just get any ol format to test with */ - error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D, CL_MEM_READ_WRITE, 0, &image_format_desc ); - test_error( error, "Unable to obtain suitable image format to test with!" ); + error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE2D, + CL_MEM_READ_WRITE, 0, &image_format_desc); + test_error(error, "Unable to obtain suitable image format to test with!"); /* Get the max 2d image width */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxDimension ), &maxDimension, NULL ); - test_error( error, "Unable to get max image 2d height from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE2D_MAX_HEIGHT, + sizeof(maxDimension), &maxDimension, NULL); + test_error(error, "Unable to get max image 2d height from device"); - if( maxDimension < minRequiredDimension ) + if (maxDimension < minRequiredDimension) { - log_error( "ERROR: Reported max image 2d height is less than required! (%d)\n", (int)maxDimension ); + log_error( + "ERROR: Reported max image 2d height is less than required! (%d)\n", + (int)maxDimension); return -1; } log_info("Max reported height is %ld.\n", maxDimension); @@ -655,56 +790,67 @@ int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_c /* Verify we can use the format */ image_format_desc.image_channel_data_type = CL_UNORM_INT8; image_format_desc.image_channel_order = CL_RGBA; - if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, &image_format_desc)) { + if (!is_image_format_supported(context, CL_MEM_READ_ONLY, + CL_MEM_OBJECT_IMAGE2D, &image_format_desc)) + { log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test."); return -1; } /* Verify that we can actually allocate an image that large */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL ); - test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." ); - if ( (cl_ulong)maxDimension*1*4 > maxAllocSize ) { - log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n", - (cl_ulong)maxDimension*1*4, maxAllocSize); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, + sizeof(maxAllocSize), &maxAllocSize, NULL); + test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE."); + if ((cl_ulong)maxDimension * 1 * 4 > maxAllocSize) + { + log_error("Can not allocate a large enough image (min size: %lld " + "bytes, max allowed: %lld bytes) to test.\n", + (cl_ulong)maxDimension * 1 * 4, maxAllocSize); return -1; } - log_info("Attempting to create an image of size 1 x %d = %gMB.\n", (int)maxDimension, ((float)maxDimension*4/1024.0/1024.0)); + log_info("Attempting to create an image of size 1 x %d = %gMB.\n", + (int)maxDimension, ((float)maxDimension * 4 / 1024.0 / 1024.0)); /* Try to allocate a very big image */ - streams[0] = create_image_2d( context, CL_MEM_READ_ONLY, &image_format_desc, 1, maxDimension, 0, NULL, &error ); - if( ( streams[0] == NULL ) || ( error != CL_SUCCESS )) + streams[0] = create_image_2d(context, CL_MEM_READ_ONLY, &image_format_desc, + 1, maxDimension, 0, NULL, &error); + if ((streams[0] == NULL) || (error != CL_SUCCESS)) { - print_error( error, "Image 2D creation failed for maximum height" ); + print_error(error, "Image 2D creation failed for maximum height"); return -1; } return 0; } -int test_min_max_image_3d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_image_3d_width(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; size_t maxDimension; clMemWrapper streams[1]; - cl_image_format image_format_desc; + cl_image_format image_format_desc; cl_ulong maxAllocSize; - PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( deviceID ) + PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(deviceID) /* Just get any ol format to test with */ error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE3D, CL_MEM_READ_ONLY, 0, &image_format_desc); - test_error( error, "Unable to obtain suitable image format to test with!" ); + test_error(error, "Unable to obtain suitable image format to test with!"); /* Get the max 2d image width */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( maxDimension ), &maxDimension, NULL ); - test_error( error, "Unable to get max image 3d width from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE3D_MAX_WIDTH, + sizeof(maxDimension), &maxDimension, NULL); + test_error(error, "Unable to get max image 3d width from device"); - if( maxDimension < 2048 ) + if (maxDimension < 2048) { - log_error( "ERROR: Reported max image 3d width is less than required! (%d)\n", (int)maxDimension ); + log_error( + "ERROR: Reported max image 3d width is less than required! (%d)\n", + (int)maxDimension); return -1; } log_info("Max reported width is %ld.\n", maxDimension); @@ -712,56 +858,68 @@ int test_min_max_image_3d_width(cl_device_id deviceID, cl_context context, cl_co /* Verify we can use the format */ image_format_desc.image_channel_data_type = CL_UNORM_INT8; image_format_desc.image_channel_order = CL_RGBA; - if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, &image_format_desc)) { + if (!is_image_format_supported(context, CL_MEM_READ_ONLY, + CL_MEM_OBJECT_IMAGE3D, &image_format_desc)) + { log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test."); return -1; } /* Verify that we can actually allocate an image that large */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL ); - test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." ); - if ( (cl_ulong)maxDimension*2*4 > maxAllocSize ) { - log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n", - (cl_ulong)maxDimension*2*4, maxAllocSize); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, + sizeof(maxAllocSize), &maxAllocSize, NULL); + test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE."); + if ((cl_ulong)maxDimension * 2 * 4 > maxAllocSize) + { + log_error("Can not allocate a large enough image (min size: %lld " + "bytes, max allowed: %lld bytes) to test.\n", + (cl_ulong)maxDimension * 2 * 4, maxAllocSize); return -1; } - log_info("Attempting to create an image of size %d x 1 x 2 = %gMB.\n", (int)maxDimension, (2*(float)maxDimension*4/1024.0/1024.0)); + log_info("Attempting to create an image of size %d x 1 x 2 = %gMB.\n", + (int)maxDimension, + (2 * (float)maxDimension * 4 / 1024.0 / 1024.0)); /* Try to allocate a very big image */ - streams[0] = create_image_3d( context, CL_MEM_READ_ONLY, &image_format_desc, maxDimension, 1, 2, 0, 0, NULL, &error ); - if( ( streams[0] == NULL ) || ( error != CL_SUCCESS )) + streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &image_format_desc, + maxDimension, 1, 2, 0, 0, NULL, &error); + if ((streams[0] == NULL) || (error != CL_SUCCESS)) { - print_error( error, "Image 3D creation failed for maximum width" ); + print_error(error, "Image 3D creation failed for maximum width"); return -1; } return 0; } -int test_min_max_image_3d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_image_3d_height(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; size_t maxDimension; clMemWrapper streams[1]; - cl_image_format image_format_desc; + cl_image_format image_format_desc; cl_ulong maxAllocSize; - PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( deviceID ) + PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(deviceID) /* Just get any ol format to test with */ error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE3D, CL_MEM_READ_ONLY, 0, &image_format_desc); - test_error( error, "Unable to obtain suitable image format to test with!" ); + test_error(error, "Unable to obtain suitable image format to test with!"); /* Get the max 2d image width */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( maxDimension ), &maxDimension, NULL ); - test_error( error, "Unable to get max image 3d height from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE3D_MAX_HEIGHT, + sizeof(maxDimension), &maxDimension, NULL); + test_error(error, "Unable to get max image 3d height from device"); - if( maxDimension < 2048 ) + if (maxDimension < 2048) { - log_error( "ERROR: Reported max image 3d height is less than required! (%d)\n", (int)maxDimension ); + log_error( + "ERROR: Reported max image 3d height is less than required! (%d)\n", + (int)maxDimension); return -1; } log_info("Max reported height is %ld.\n", maxDimension); @@ -769,27 +927,35 @@ int test_min_max_image_3d_height(cl_device_id deviceID, cl_context context, cl_c /* Verify we can use the format */ image_format_desc.image_channel_data_type = CL_UNORM_INT8; image_format_desc.image_channel_order = CL_RGBA; - if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, &image_format_desc)) { + if (!is_image_format_supported(context, CL_MEM_READ_ONLY, + CL_MEM_OBJECT_IMAGE3D, &image_format_desc)) + { log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test."); return -1; } /* Verify that we can actually allocate an image that large */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL ); - test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." ); - if ( (cl_ulong)maxDimension*2*4 > maxAllocSize ) { - log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n", - (cl_ulong)maxDimension*2*4, maxAllocSize); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, + sizeof(maxAllocSize), &maxAllocSize, NULL); + test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE."); + if ((cl_ulong)maxDimension * 2 * 4 > maxAllocSize) + { + log_error("Can not allocate a large enough image (min size: %lld " + "bytes, max allowed: %lld bytes) to test.\n", + (cl_ulong)maxDimension * 2 * 4, maxAllocSize); return -1; } - log_info("Attempting to create an image of size 1 x %d x 2 = %gMB.\n", (int)maxDimension, (2*(float)maxDimension*4/1024.0/1024.0)); + log_info("Attempting to create an image of size 1 x %d x 2 = %gMB.\n", + (int)maxDimension, + (2 * (float)maxDimension * 4 / 1024.0 / 1024.0)); /* Try to allocate a very big image */ - streams[0] = create_image_3d( context, CL_MEM_READ_ONLY, &image_format_desc, 1, maxDimension, 2, 0, 0, NULL, &error ); - if( ( streams[0] == NULL ) || ( error != CL_SUCCESS )) + streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &image_format_desc, + 1, maxDimension, 2, 0, 0, NULL, &error); + if ((streams[0] == NULL) || (error != CL_SUCCESS)) { - print_error( error, "Image 3D creation failed for maximum height" ); + print_error(error, "Image 3D creation failed for maximum height"); return -1; } @@ -797,29 +963,33 @@ int test_min_max_image_3d_height(cl_device_id deviceID, cl_context context, cl_c } -int test_min_max_image_3d_depth(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_image_3d_depth(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; size_t maxDimension; clMemWrapper streams[1]; - cl_image_format image_format_desc; + cl_image_format image_format_desc; cl_ulong maxAllocSize; - PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( deviceID ) + PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(deviceID) /* Just get any ol format to test with */ error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE3D, CL_MEM_READ_ONLY, 0, &image_format_desc); - test_error( error, "Unable to obtain suitable image format to test with!" ); + test_error(error, "Unable to obtain suitable image format to test with!"); /* Get the max 2d image width */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof( maxDimension ), &maxDimension, NULL ); - test_error( error, "Unable to get max image 3d depth from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE3D_MAX_DEPTH, + sizeof(maxDimension), &maxDimension, NULL); + test_error(error, "Unable to get max image 3d depth from device"); - if( maxDimension < 2048 ) + if (maxDimension < 2048) { - log_error( "ERROR: Reported max image 3d depth is less than required! (%d)\n", (int)maxDimension ); + log_error( + "ERROR: Reported max image 3d depth is less than required! (%d)\n", + (int)maxDimension); return -1; } log_info("Max reported depth is %ld.\n", maxDimension); @@ -827,55 +997,67 @@ int test_min_max_image_3d_depth(cl_device_id deviceID, cl_context context, cl_co /* Verify we can use the format */ image_format_desc.image_channel_data_type = CL_UNORM_INT8; image_format_desc.image_channel_order = CL_RGBA; - if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, &image_format_desc)) { + if (!is_image_format_supported(context, CL_MEM_READ_ONLY, + CL_MEM_OBJECT_IMAGE3D, &image_format_desc)) + { log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test."); return -1; } /* Verify that we can actually allocate an image that large */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL ); - test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." ); - if ( (cl_ulong)maxDimension*1*4 > maxAllocSize ) { - log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n", - (cl_ulong)maxDimension*1*4, maxAllocSize); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, + sizeof(maxAllocSize), &maxAllocSize, NULL); + test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE."); + if ((cl_ulong)maxDimension * 1 * 4 > maxAllocSize) + { + log_error("Can not allocate a large enough image (min size: %lld " + "bytes, max allowed: %lld bytes) to test.\n", + (cl_ulong)maxDimension * 1 * 4, maxAllocSize); return -1; } - log_info("Attempting to create an image of size 1 x 1 x %d = %gMB.\n", (int)maxDimension, ((float)maxDimension*4/1024.0/1024.0)); + log_info("Attempting to create an image of size 1 x 1 x %d = %gMB.\n", + (int)maxDimension, ((float)maxDimension * 4 / 1024.0 / 1024.0)); /* Try to allocate a very big image */ - streams[0] = create_image_3d( context, CL_MEM_READ_ONLY, &image_format_desc, 1, 1, maxDimension, 0, 0, NULL, &error ); - if( ( streams[0] == NULL ) || ( error != CL_SUCCESS )) + streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &image_format_desc, + 1, 1, maxDimension, 0, 0, NULL, &error); + if ((streams[0] == NULL) || (error != CL_SUCCESS)) { - print_error( error, "Image 3D creation failed for maximum depth" ); + print_error(error, "Image 3D creation failed for maximum depth"); return -1; } return 0; } -int test_min_max_image_array_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_image_array_size(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; size_t maxDimension; clMemWrapper streams[1]; - cl_image_format image_format_desc; + cl_image_format image_format_desc; cl_ulong maxAllocSize; size_t minRequiredDimension = gIsEmbedded ? 256 : 2048; - PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID ); + PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID); /* Just get any ol format to test with */ - error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_READ_WRITE, 0, &image_format_desc ); - test_error( error, "Unable to obtain suitable image format to test with!" ); + error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE2D_ARRAY, + CL_MEM_READ_WRITE, 0, &image_format_desc); + test_error(error, "Unable to obtain suitable image format to test with!"); /* Get the max image array width */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxDimension ), &maxDimension, NULL ); - test_error( error, "Unable to get max image array size from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, + sizeof(maxDimension), &maxDimension, NULL); + test_error(error, "Unable to get max image array size from device"); - if( maxDimension < minRequiredDimension ) + if (maxDimension < minRequiredDimension) { - log_error( "ERROR: Reported max image array size is less than required! (%d)\n", (int)maxDimension ); + log_error("ERROR: Reported max image array size is less than required! " + "(%d)\n", + (int)maxDimension); return -1; } log_info("Max reported image array size is %ld.\n", maxDimension); @@ -883,96 +1065,127 @@ int test_min_max_image_array_size(cl_device_id deviceID, cl_context context, cl_ /* Verify we can use the format */ image_format_desc.image_channel_data_type = CL_UNORM_INT8; image_format_desc.image_channel_order = CL_RGBA; - if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D_ARRAY, &image_format_desc)) { + if (!is_image_format_supported(context, CL_MEM_READ_ONLY, + CL_MEM_OBJECT_IMAGE2D_ARRAY, + &image_format_desc)) + { log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test."); return -1; } /* Verify that we can actually allocate an image that large */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL ); - test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." ); - if ( (cl_ulong)maxDimension*1*4 > maxAllocSize ) { - log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n", - (cl_ulong)maxDimension*1*4, maxAllocSize); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, + sizeof(maxAllocSize), &maxAllocSize, NULL); + test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE."); + if ((cl_ulong)maxDimension * 1 * 4 > maxAllocSize) + { + log_error("Can not allocate a large enough image (min size: %lld " + "bytes, max allowed: %lld bytes) to test.\n", + (cl_ulong)maxDimension * 1 * 4, maxAllocSize); return -1; } - log_info("Attempting to create an image of size 1 x 1 x %d = %gMB.\n", (int)maxDimension, ((float)maxDimension*4/1024.0/1024.0)); + log_info("Attempting to create an image of size 1 x 1 x %d = %gMB.\n", + (int)maxDimension, ((float)maxDimension * 4 / 1024.0 / 1024.0)); /* Try to allocate a very big image */ - streams[0] = create_image_2d_array( context, CL_MEM_READ_ONLY, &image_format_desc, 1, 1, maxDimension, 0, 0, NULL, &error ); - if( ( streams[0] == NULL ) || ( error != CL_SUCCESS )) + streams[0] = + create_image_2d_array(context, CL_MEM_READ_ONLY, &image_format_desc, 1, + 1, maxDimension, 0, 0, NULL, &error); + if ((streams[0] == NULL) || (error != CL_SUCCESS)) { - print_error( error, "2D Image Array creation failed for maximum array size" ); + print_error(error, + "2D Image Array creation failed for maximum array size"); return -1; } return 0; } -int test_min_max_image_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_image_buffer_size(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; size_t maxDimensionPixels; clMemWrapper streams[2]; - cl_image_format image_format_desc = {0}; + cl_image_format image_format_desc = { 0 }; cl_ulong maxAllocSize; size_t minRequiredDimension = gIsEmbedded ? 2048 : 65536; unsigned int i = 0; size_t pixelBytes = 0; - PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID ); + PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID); /* Get the max memory allocation size */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL ); - test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, + sizeof(maxAllocSize), &maxAllocSize, NULL); + test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE."); /* Get the max image array width */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, sizeof( maxDimensionPixels ), &maxDimensionPixels, NULL ); - test_error( error, "Unable to get max image buffer size from device" ); + error = + clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, + sizeof(maxDimensionPixels), &maxDimensionPixels, NULL); + test_error(error, "Unable to get max image buffer size from device"); - if( maxDimensionPixels < minRequiredDimension ) + if (maxDimensionPixels < minRequiredDimension) { - log_error( "ERROR: Reported max image buffer size is less than required! (%d)\n", (int)maxDimensionPixels ); + log_error("ERROR: Reported max image buffer size is less than " + "required! (%d)\n", + (int)maxDimensionPixels); return -1; } - log_info("Max reported image buffer size is %ld pixels.\n", maxDimensionPixels); + log_info("Max reported image buffer size is %ld pixels.\n", + maxDimensionPixels); pixelBytes = maxAllocSize / maxDimensionPixels; - if ( pixelBytes == 0 ) + if (pixelBytes == 0) { - log_error( "Value of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE is greater than CL_MAX_MEM_ALLOC_SIZE so there is no way to allocate image of maximum size!\n" ); + log_error("Value of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE is greater than " + "CL_MAX_MEM_ALLOC_SIZE so there is no way to allocate image " + "of maximum size!\n"); return -1; } error = -1; - for ( i = pixelBytes; i > 0; --i ) + for (i = pixelBytes; i > 0; --i) { - error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE1D, CL_MEM_READ_ONLY, i, &image_format_desc ); - if ( error == CL_SUCCESS ) + error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE1D, + CL_MEM_READ_ONLY, i, &image_format_desc); + if (error == CL_SUCCESS) { pixelBytes = i; break; } } - test_error( error, "Device does not support format to be used to allocate image of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE\n" ); + test_error(error, + "Device does not support format to be used to allocate image of " + "CL_DEVICE_IMAGE_MAX_BUFFER_SIZE\n"); - log_info("Attempting to create an 1D image with channel order %s from buffer of size %d = %gMB.\n", - GetChannelOrderName( image_format_desc.image_channel_order ), (int)maxDimensionPixels, ((float)maxDimensionPixels*pixelBytes/1024.0/1024.0)); + log_info("Attempting to create an 1D image with channel order %s from " + "buffer of size %d = %gMB.\n", + GetChannelOrderName(image_format_desc.image_channel_order), + (int)maxDimensionPixels, + ((float)maxDimensionPixels * pixelBytes / 1024.0 / 1024.0)); /* Try to allocate a buffer */ - streams[0] = clCreateBuffer( context, CL_MEM_READ_ONLY, maxDimensionPixels*pixelBytes, NULL, &error ); - if( ( streams[0] == NULL ) || ( error != CL_SUCCESS )) + streams[0] = clCreateBuffer(context, CL_MEM_READ_ONLY, + maxDimensionPixels * pixelBytes, NULL, &error); + if ((streams[0] == NULL) || (error != CL_SUCCESS)) { - print_error( error, "Buffer creation failed for maximum image buffer size" ); + print_error(error, + "Buffer creation failed for maximum image buffer size"); return -1; } /* Try to allocate a 1D image array from buffer */ - streams[1] = create_image_1d( context, CL_MEM_READ_ONLY, &image_format_desc, maxDimensionPixels, 0, NULL, streams[0], &error ); - if( ( streams[0] == NULL ) || ( error != CL_SUCCESS )) - { - print_error( error, "1D Image from buffer creation failed for maximum image buffer size" ); + streams[1] = + create_image_1d(context, CL_MEM_READ_ONLY, &image_format_desc, + maxDimensionPixels, 0, NULL, streams[0], &error); + if ((streams[0] == NULL) || (error != CL_SUCCESS)) + { + print_error(error, + "1D Image from buffer creation failed for maximum image " + "buffer size"); return -1; } @@ -980,8 +1193,8 @@ int test_min_max_image_buffer_size(cl_device_id deviceID, cl_context context, cl } - -int test_min_max_parameter_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_parameter_size(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error, retVal, i; size_t maxSize; @@ -1000,62 +1213,78 @@ int test_min_max_parameter_size(cl_device_id deviceID, cl_context context, cl_co /* Get the max param size */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxSize ), &maxSize, NULL ); - test_error( error, "Unable to get max parameter size from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, + sizeof(maxSize), &maxSize, NULL); + test_error(error, "Unable to get max parameter size from device"); - if( ((!gIsEmbedded) && (maxSize < 1024)) || ((gIsEmbedded) && (maxSize < 256)) ) + if (((!gIsEmbedded) && (maxSize < 1024)) + || ((gIsEmbedded) && (maxSize < 256))) { - log_error( "ERROR: Reported max parameter size is less than required! (%d)\n", (int)maxSize ); + log_error( + "ERROR: Reported max parameter size is less than required! (%d)\n", + (int)maxSize); return -1; } /* The embedded profile without cles_khr_int64 extension does not require * longs, so use ints */ if (embeddedNoLong) - numberOfIntParametersToTry = numberExpected = (maxSize-sizeof(cl_mem))/sizeof(cl_int); + numberOfIntParametersToTry = numberExpected = + (maxSize - sizeof(cl_mem)) / sizeof(cl_int); else - numberOfIntParametersToTry = numberExpected = (maxSize-sizeof(cl_mem))/sizeof(cl_long); + numberOfIntParametersToTry = numberExpected = + (maxSize - sizeof(cl_mem)) / sizeof(cl_long); - decrement = (size_t)(numberOfIntParametersToTry/8); - if (decrement < 1) - decrement = 1; + decrement = (size_t)(numberOfIntParametersToTry / 8); + if (decrement < 1) decrement = 1; log_info("Reported max parameter size of %d bytes.\n", (int)maxSize); - while (numberOfIntParametersToTry > 0) { - // These need to be inside to be deallocated automatically on each loop iteration. + while (numberOfIntParametersToTry > 0) + { + // These need to be inside to be deallocated automatically on each loop + // iteration. clProgramWrapper program; clMemWrapper mem; clKernelWrapper kernel; if (embeddedNoLong) { - log_info("Trying a kernel with %ld int arguments (%ld bytes) and one cl_mem (%ld bytes) for %ld bytes total.\n", - numberOfIntParametersToTry, sizeof(cl_int)*numberOfIntParametersToTry, sizeof(cl_mem), - sizeof(cl_mem)+numberOfIntParametersToTry*sizeof(cl_int)); + log_info( + "Trying a kernel with %ld int arguments (%ld bytes) and one " + "cl_mem (%ld bytes) for %ld bytes total.\n", + numberOfIntParametersToTry, + sizeof(cl_int) * numberOfIntParametersToTry, sizeof(cl_mem), + sizeof(cl_mem) + numberOfIntParametersToTry * sizeof(cl_int)); } else { - log_info("Trying a kernel with %ld long arguments (%ld bytes) and one cl_mem (%ld bytes) for %ld bytes total.\n", - numberOfIntParametersToTry, sizeof(cl_long)*numberOfIntParametersToTry, sizeof(cl_mem), - sizeof(cl_mem)+numberOfIntParametersToTry*sizeof(cl_long)); + log_info( + "Trying a kernel with %ld long arguments (%ld bytes) and one " + "cl_mem (%ld bytes) for %ld bytes total.\n", + numberOfIntParametersToTry, + sizeof(cl_long) * numberOfIntParametersToTry, sizeof(cl_mem), + sizeof(cl_mem) + numberOfIntParametersToTry * sizeof(cl_long)); } // Allocate memory for the program storage - data = malloc(sizeof(cl_long)*numberOfIntParametersToTry); - - argumentLine = (char*)malloc(sizeof(char)*numberOfIntParametersToTry*32); - codeLines = (char*)malloc(sizeof(char)*numberOfIntParametersToTry*32); - programSrc = (char*)malloc(sizeof(char)*(numberOfIntParametersToTry*64+1024)); + data = malloc(sizeof(cl_long) * numberOfIntParametersToTry); + + argumentLine = + (char *)malloc(sizeof(char) * numberOfIntParametersToTry * 32); + codeLines = + (char *)malloc(sizeof(char) * numberOfIntParametersToTry * 32); + programSrc = (char *)malloc(sizeof(char) + * (numberOfIntParametersToTry * 64 + 1024)); argumentLine[0] = '\0'; codeLines[0] = '\0'; programSrc[0] = '\0'; // Generate our results expectedResult = 0; - for (i=0; i<(int)numberOfIntParametersToTry; i++) - { - if( gHasLong ) + for (i = 0; i < (int)numberOfIntParametersToTry; i++) + { + if (gHasLong) { ((cl_long *)data)[i] = i; expectedResult += i; @@ -1068,30 +1297,35 @@ int test_min_max_parameter_size(cl_device_id deviceID, cl_context context, cl_co } // Build the program - if( gHasLong) + if (gHasLong) sprintf(argumentLine, "%s", "long arg0"); else sprintf(argumentLine, "%s", "int arg0"); sprintf(codeLines, "%s", "result[0] += arg0;"); - for (i=1; i<(int)numberOfIntParametersToTry; i++) + for (i = 1; i < (int)numberOfIntParametersToTry; i++) { - if( gHasLong) - sprintf(argumentLine + strlen( argumentLine), ", long arg%d", i); + if (gHasLong) + sprintf(argumentLine + strlen(argumentLine), ", long arg%d", i); else - sprintf(argumentLine + strlen( argumentLine), ", int arg%d", i); + sprintf(argumentLine + strlen(argumentLine), ", int arg%d", i); - sprintf(codeLines + strlen( codeLines), "\nresult[0] += arg%d;", i); + sprintf(codeLines + strlen(codeLines), "\nresult[0] += arg%d;", i); } /* Create a kernel to test with */ - sprintf( programSrc, gHasLong ? sample_large_parmam_kernel_pattern[0]: - sample_large_int_parmam_kernel_pattern[0], argumentLine, codeLines); + sprintf(programSrc, + gHasLong ? sample_large_parmam_kernel_pattern[0] + : sample_large_int_parmam_kernel_pattern[0], + argumentLine, codeLines); ptr = programSrc; - if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&ptr, "sample_test" ) != 0 ) + if (create_single_kernel_helper(context, &program, &kernel, 1, + (const char **)&ptr, "sample_test") + != 0) { - log_info("Create program failed, decrementing number of parameters to try.\n"); + log_info("Create program failed, decrementing number of parameters " + "to try.\n"); numberOfIntParametersToTry -= decrement; continue; } @@ -1103,88 +1337,119 @@ int test_min_max_parameter_size(cl_device_id deviceID, cl_context context, cl_co &error); test_error(error, "clCreateBuffer failed"); - for (i=0; i<(int)numberOfIntParametersToTry; i++) { - if(gHasLong) - error = clSetKernelArg(kernel, i, sizeof(cl_long), &(((cl_long*)data)[i])); + for (i = 0; i < (int)numberOfIntParametersToTry; i++) + { + if (gHasLong) + error = clSetKernelArg(kernel, i, sizeof(cl_long), + &(((cl_long *)data)[i])); else - error = clSetKernelArg(kernel, i, sizeof(cl_int), &(((cl_int*)data)[i])); + error = clSetKernelArg(kernel, i, sizeof(cl_int), + &(((cl_int *)data)[i])); - if (error != CL_SUCCESS) { - log_info( "clSetKernelArg failed (%s), decrementing number of parameters to try.\n", IGetErrorString(error)); + if (error != CL_SUCCESS) + { + log_info("clSetKernelArg failed (%s), decrementing number of " + "parameters to try.\n", + IGetErrorString(error)); numberOfIntParametersToTry -= decrement; break; } } - if (error != CL_SUCCESS) - continue; + if (error != CL_SUCCESS) continue; error = clSetKernelArg(kernel, i, sizeof(cl_mem), &mem); - if (error != CL_SUCCESS) { - log_info( "clSetKernelArg failed (%s), decrementing number of parameters to try.\n", IGetErrorString(error)); + if (error != CL_SUCCESS) + { + log_info("clSetKernelArg failed (%s), decrementing number of " + "parameters to try.\n", + IGetErrorString(error)); numberOfIntParametersToTry -= decrement; continue; } - size_t globalDim[3]={1,1,1}, localDim[3]={1,1,1}; - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalDim, localDim, 0, NULL, &event); - if (error != CL_SUCCESS) { - log_info( "clEnqueueNDRangeKernel failed (%s), decrementing number of parameters to try.\n", IGetErrorString(error)); + size_t globalDim[3] = { 1, 1, 1 }, localDim[3] = { 1, 1, 1 }; + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalDim, + localDim, 0, NULL, &event); + if (error != CL_SUCCESS) + { + log_info("clEnqueueNDRangeKernel failed (%s), decrementing number " + "of parameters to try.\n", + IGetErrorString(error)); numberOfIntParametersToTry -= decrement; continue; } // Verify that the event does not return an error from the execution error = clWaitForEvents(1, &event); - test_error( error, "clWaitForEvent failed"); - error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL); - test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed"); + test_error(error, "clWaitForEvent failed"); + error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(event_status), &event_status, NULL); + test_error( + error, + "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed"); clReleaseEvent(event); if (event_status < 0) test_error(error, "Kernel execution event returned error"); - if(gHasLong) - error = clEnqueueReadBuffer(queue, mem, CL_TRUE, 0, sizeof(cl_long), &long_result, 0, NULL, NULL); + if (gHasLong) + error = clEnqueueReadBuffer(queue, mem, CL_TRUE, 0, sizeof(cl_long), + &long_result, 0, NULL, NULL); else - error = clEnqueueReadBuffer(queue, mem, CL_TRUE, 0, sizeof(cl_int), &int_result, 0, NULL, NULL); + error = clEnqueueReadBuffer(queue, mem, CL_TRUE, 0, sizeof(cl_int), + &int_result, 0, NULL, NULL); test_error(error, "clEnqueueReadBuffer failed") - free(data); + free(data); free(argumentLine); free(codeLines); free(programSrc); - if(gHasLong) + if (gHasLong) { - if (long_result != expectedResult) { - log_error("Expected result (%lld) does not equal actual result (%lld).\n", expectedResult, long_result); + if (long_result != expectedResult) + { + log_error("Expected result (%lld) does not equal actual result " + "(%lld).\n", + expectedResult, long_result); numberOfIntParametersToTry -= decrement; continue; - } else { - log_info("Results verified at %ld bytes of arguments.\n", sizeof(cl_mem)+numberOfIntParametersToTry*sizeof(cl_long)); + } + else + { + log_info("Results verified at %ld bytes of arguments.\n", + sizeof(cl_mem) + + numberOfIntParametersToTry * sizeof(cl_long)); break; } } else { - if (int_result != expectedResult) { - log_error("Expected result (%lld) does not equal actual result (%d).\n", expectedResult, int_result); + if (int_result != expectedResult) + { + log_error("Expected result (%lld) does not equal actual result " + "(%d).\n", + expectedResult, int_result); numberOfIntParametersToTry -= decrement; continue; - } else { - log_info("Results verified at %ld bytes of arguments.\n", sizeof(cl_mem)+numberOfIntParametersToTry*sizeof(cl_int)); + } + else + { + log_info("Results verified at %ld bytes of arguments.\n", + sizeof(cl_mem) + + numberOfIntParametersToTry * sizeof(cl_int)); break; } } } - if (numberOfIntParametersToTry == (long)numberExpected) - return 0; + if (numberOfIntParametersToTry == (long)numberExpected) return 0; return -1; } -int test_min_max_samplers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_samplers(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_uint maxSamplers, i; @@ -1197,104 +1462,124 @@ int test_min_max_samplers(cl_device_id deviceID, cl_context context, cl_command_ cl_uint minRequiredSamplers = gIsEmbedded ? 8 : 16; - PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID ) + PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID) /* Get the max value */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_SAMPLERS, sizeof( maxSamplers ), &maxSamplers, NULL ); - test_error( error, "Unable to get max sampler count from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_SAMPLERS, + sizeof(maxSamplers), &maxSamplers, NULL); + test_error(error, "Unable to get max sampler count from device"); - if( maxSamplers < minRequiredSamplers ) + if (maxSamplers < minRequiredSamplers) { - log_error( "ERROR: Reported max sampler count is less than required! (%d)\n", (int)maxSamplers ); + log_error( + "ERROR: Reported max sampler count is less than required! (%d)\n", + (int)maxSamplers); return -1; } log_info("Reported max %d samplers.\n", maxSamplers); - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxParameterSize ), &maxParameterSize, NULL ); - test_error( error, "Unable to get max parameter size from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, + sizeof(maxParameterSize), &maxParameterSize, NULL); + test_error(error, "Unable to get max parameter size from device"); // Subtract the size of the result - maxParameterSize -= 2*sizeof(cl_mem); + maxParameterSize -= 2 * sizeof(cl_mem); // Calculate the number we can use - if (maxParameterSize/sizeof(cl_sampler) < maxSamplers) { - log_info("WARNING: Max parameter size of %d bytes limits test to %d max sampler arguments.\n", (int)maxParameterSize, (int)(maxParameterSize/sizeof(cl_sampler))); - maxSamplers = (unsigned int)(maxParameterSize/sizeof(cl_sampler)); + if (maxParameterSize / sizeof(cl_sampler) < maxSamplers) + { + log_info("WARNING: Max parameter size of %d bytes limits test to %d " + "max sampler arguments.\n", + (int)maxParameterSize, + (int)(maxParameterSize / sizeof(cl_sampler))); + maxSamplers = (unsigned int)(maxParameterSize / sizeof(cl_sampler)); } /* Create a kernel to test with */ - programSrc = (char *)malloc( ( strlen( sample_sampler_kernel_pattern[ 1 ] ) + 8 ) * ( maxSamplers ) + - strlen( sample_sampler_kernel_pattern[ 0 ] ) + strlen( sample_sampler_kernel_pattern[ 2 ] ) + - ( strlen( sample_sampler_kernel_pattern[ 3 ] ) + 8 ) * maxSamplers + - strlen( sample_sampler_kernel_pattern[ 4 ] ) ); - strcpy( programSrc, sample_sampler_kernel_pattern[ 0 ] ); - for( i = 0; i < maxSamplers; i++ ) + programSrc = (char *)malloc( + (strlen(sample_sampler_kernel_pattern[1]) + 8) * (maxSamplers) + + strlen(sample_sampler_kernel_pattern[0]) + + strlen(sample_sampler_kernel_pattern[2]) + + (strlen(sample_sampler_kernel_pattern[3]) + 8) * maxSamplers + + strlen(sample_sampler_kernel_pattern[4])); + strcpy(programSrc, sample_sampler_kernel_pattern[0]); + for (i = 0; i < maxSamplers; i++) { - sprintf( samplerLine, sample_sampler_kernel_pattern[ 1 ], i ); - strcat( programSrc, samplerLine ); + sprintf(samplerLine, sample_sampler_kernel_pattern[1], i); + strcat(programSrc, samplerLine); } - strcat( programSrc, sample_sampler_kernel_pattern[ 2 ] ); - for( i = 0; i < maxSamplers; i++ ) + strcat(programSrc, sample_sampler_kernel_pattern[2]); + for (i = 0; i < maxSamplers; i++) { - sprintf( samplerLine, sample_sampler_kernel_pattern[ 3 ], i ); - strcat( programSrc, samplerLine ); + sprintf(samplerLine, sample_sampler_kernel_pattern[3], i); + strcat(programSrc, samplerLine); } - strcat( programSrc, sample_sampler_kernel_pattern[ 4 ] ); + strcat(programSrc, sample_sampler_kernel_pattern[4]); - error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&programSrc, "sample_test"); - test_error( error, "Failed to create the program and kernel."); + error = + create_single_kernel_helper(context, &program, &kernel, 1, + (const char **)&programSrc, "sample_test"); + test_error(error, "Failed to create the program and kernel."); // We have to set up some fake parameters so it'll work clSamplerWrapper *samplers = new clSamplerWrapper[maxSamplers]; cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 }; - clMemWrapper image = create_image_2d( context, CL_MEM_READ_WRITE, &format, 16, 16, 0, NULL, &error ); - test_error( error, "Unable to create a test image" ); + clMemWrapper image = create_image_2d(context, CL_MEM_READ_WRITE, &format, + 16, 16, 0, NULL, &error); + test_error(error, "Unable to create a test image"); clMemWrapper stream = clCreateBuffer(context, CL_MEM_READ_WRITE, 16, NULL, &error); - test_error( error, "Unable to create test buffer" ); + test_error(error, "Unable to create test buffer"); - error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &image ); - error |= clSetKernelArg( kernel, 1, sizeof( cl_mem ), &stream ); - test_error( error, "Unable to set kernel arguments" ); - for( i = 0; i < maxSamplers; i++ ) + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &image); + error |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &stream); + test_error(error, "Unable to set kernel arguments"); + for (i = 0; i < maxSamplers; i++) { - samplers[ i ] = clCreateSampler( context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error ); - test_error( error, "Unable to create sampler" ); + samplers[i] = clCreateSampler(context, CL_FALSE, CL_ADDRESS_NONE, + CL_FILTER_NEAREST, &error); + test_error(error, "Unable to create sampler"); - error = clSetKernelArg( kernel, 2 + i, sizeof( cl_sampler ), &samplers[ i ] ); - test_error( error, "Unable to set sampler argument" ); + error = clSetKernelArg(kernel, 2 + i, sizeof(cl_sampler), &samplers[i]); + test_error(error, "Unable to set sampler argument"); } - size_t globalDim[3]={1,1,1}, localDim[3]={1,1,1}; - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalDim, localDim, 0, NULL, &event); - test_error(error, "clEnqueueNDRangeKernel failed with maximum number of samplers."); + size_t globalDim[3] = { 1, 1, 1 }, localDim[3] = { 1, 1, 1 }; + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalDim, localDim, + 0, NULL, &event); + test_error( + error, + "clEnqueueNDRangeKernel failed with maximum number of samplers."); // Verify that the event does not return an error from the execution error = clWaitForEvents(1, &event); - test_error( error, "clWaitForEvent failed"); - error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL); - test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed"); + test_error(error, "clWaitForEvent failed"); + error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(event_status), &event_status, NULL); + test_error(error, + "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed"); clReleaseEvent(event); if (event_status < 0) test_error(error, "Kernel execution event returned error"); - free( programSrc ); + free(programSrc); delete[] samplers; return 0; } #define PASSING_FRACTION 4 -int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; clProgramWrapper program; clKernelWrapper kernel; - size_t threads[1], localThreads[1]; + size_t threads[1], localThreads[1]; cl_int *constantData, *resultData; cl_ulong maxSize, stepSize, currentSize, maxGlobalSize, maxAllocSize; int i; @@ -1303,48 +1588,56 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, MTdata d; /* Verify our test buffer won't be bigger than allowed */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 ); - test_error( error, "Unable to get max constant buffer size" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, + sizeof(maxSize), &maxSize, 0); + test_error(error, "Unable to get max constant buffer size"); - if( ( 0 == gIsEmbedded && maxSize < 64L * 1024L ) || maxSize < 1L * 1024L ) + if ((0 == gIsEmbedded && maxSize < 64L * 1024L) || maxSize < 1L * 1024L) { - log_error( "ERROR: Reported max constant buffer size less than required by OpenCL 1.0 (reported %d KB)\n", (int)( maxSize / 1024L ) ); + log_error("ERROR: Reported max constant buffer size less than required " + "by OpenCL 1.0 (reported %d KB)\n", + (int)(maxSize / 1024L)); return -1; } log_info("Reported max constant buffer size of %lld bytes.\n", maxSize); // Limit test buffer size to 1/8 of CL_DEVICE_GLOBAL_MEM_SIZE - error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxGlobalSize), &maxGlobalSize, 0); + error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, + sizeof(maxGlobalSize), &maxGlobalSize, 0); test_error(error, "Unable to get CL_DEVICE_GLOBAL_MEM_SIZE"); - if (maxSize > maxGlobalSize / 8) - maxSize = maxGlobalSize / 8; + if (maxSize > maxGlobalSize / 8) maxSize = maxGlobalSize / 8; - error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(maxAllocSize), &maxAllocSize, 0); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, + sizeof(maxAllocSize), &maxAllocSize, 0); test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE "); - - if (maxSize > maxAllocSize) - maxSize = maxAllocSize; - + + if (maxSize > maxAllocSize) maxSize = maxAllocSize; + /* Create a kernel to test with */ - if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_arg_kernel, "sample_test" ) != 0 ) + if (create_single_kernel_helper(context, &program, &kernel, 1, + sample_const_arg_kernel, "sample_test") + != 0) { return -1; } /* Try the returned max size and decrease it until we get one that works. */ - stepSize = maxSize/16; + stepSize = maxSize / 16; currentSize = maxSize; int allocPassed = 0; - d = init_genrand( gRandomSeed ); - while (!allocPassed && currentSize >= maxSize/PASSING_FRACTION) { - log_info("Attempting to allocate constant buffer of size %lld bytes\n", maxSize); + d = init_genrand(gRandomSeed); + while (!allocPassed && currentSize >= maxSize / PASSING_FRACTION) + { + log_info("Attempting to allocate constant buffer of size %lld bytes\n", + maxSize); /* Create some I/O streams */ - size_t sizeToAllocate = ((size_t)currentSize/sizeof( cl_int ))*sizeof(cl_int); - size_t numberOfInts = sizeToAllocate/sizeof(cl_int); - constantData = (cl_int *)malloc( sizeToAllocate); + size_t sizeToAllocate = + ((size_t)currentSize / sizeof(cl_int)) * sizeof(cl_int); + size_t numberOfInts = sizeToAllocate / sizeof(cl_int); + constantData = (cl_int *)malloc(sizeToAllocate); if (constantData == NULL) { log_error("Failed to allocate memory for constantData!\n"); @@ -1352,53 +1645,74 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, return EXIT_FAILURE; } - for(i=0; i<(int)(numberOfInts); i++) + for (i = 0; i < (int)(numberOfInts); i++) constantData[i] = (int)genrand_int32(d); clMemWrapper streams[3]; streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeToAllocate, constantData, &error); - test_error( error, "Creating test array failed" ); + test_error(error, "Creating test array failed"); streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate, NULL, &error); - test_error( error, "Creating test array failed" ); + test_error(error, "Creating test array failed"); /* Set the arguments */ - error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]); - test_error( error, "Unable to set indexed kernel arguments" ); - error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]); - test_error( error, "Unable to set indexed kernel arguments" ); + error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]); + test_error(error, "Unable to set indexed kernel arguments"); + error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]); + test_error(error, "Unable to set indexed kernel arguments"); /* Test running the kernel and verifying it */ threads[0] = numberOfInts; localThreads[0] = 1; - log_info("Filling constant buffer with %d cl_ints (%d bytes).\n", (int)threads[0], (int)(threads[0]*sizeof(cl_int))); - - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, &event ); - /* If we failed due to a resource issue, reduce the size and try again. */ - if ((error == CL_OUT_OF_RESOURCES) || (error == CL_MEM_OBJECT_ALLOCATION_FAILURE) || (error == CL_OUT_OF_HOST_MEMORY)) { - log_info("Kernel enqueue failed at size %lld, trying at a reduced size.\n", currentSize); + log_info("Filling constant buffer with %d cl_ints (%d bytes).\n", + (int)threads[0], (int)(threads[0] * sizeof(cl_int))); + + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, + localThreads, 0, NULL, &event); + /* If we failed due to a resource issue, reduce the size and try again. + */ + if ((error == CL_OUT_OF_RESOURCES) + || (error == CL_MEM_OBJECT_ALLOCATION_FAILURE) + || (error == CL_OUT_OF_HOST_MEMORY)) + { + log_info("Kernel enqueue failed at size %lld, trying at a reduced " + "size.\n", + currentSize); currentSize -= stepSize; free(constantData); continue; } - test_error( error, "clEnqueueNDRangeKernel with maximum constant buffer size failed."); + test_error( + error, + "clEnqueueNDRangeKernel with maximum constant buffer size failed."); // Verify that the event does not return an error from the execution error = clWaitForEvents(1, &event); - test_error( error, "clWaitForEvent failed"); - error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL); - test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed"); + test_error(error, "clWaitForEvent failed"); + error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(event_status), &event_status, NULL); + test_error( + error, + "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed"); clReleaseEvent(event); - if (event_status < 0) { - if ((event_status == CL_OUT_OF_RESOURCES) || (event_status == CL_MEM_OBJECT_ALLOCATION_FAILURE) || (event_status == CL_OUT_OF_HOST_MEMORY)) { - log_info("Kernel event indicates failure at size %lld, trying at a reduced size.\n", currentSize); + if (event_status < 0) + { + if ((event_status == CL_OUT_OF_RESOURCES) + || (event_status == CL_MEM_OBJECT_ALLOCATION_FAILURE) + || (event_status == CL_OUT_OF_HOST_MEMORY)) + { + log_info("Kernel event indicates failure at size %lld, trying " + "at a reduced size.\n", + currentSize); currentSize -= stepSize; free(constantData); continue; - } else { + } + else + { test_error(error, "Kernel execution event returned error"); } } @@ -1415,30 +1729,41 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, return EXIT_FAILURE; } - error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, sizeToAllocate, resultData, 0, NULL, NULL); - test_error( error, "clEnqueueReadBuffer failed"); + error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, + sizeToAllocate, resultData, 0, NULL, NULL); + test_error(error, "clEnqueueReadBuffer failed"); - for(i=0; i<(int)(numberOfInts); i++) - if (constantData[i] != resultData[i]) { - log_error("Data failed to verify: constantData[%d]=%d != resultData[%d]=%d\n", + for (i = 0; i < (int)(numberOfInts); i++) + if (constantData[i] != resultData[i]) + { + log_error("Data failed to verify: constantData[%d]=%d != " + "resultData[%d]=%d\n", i, constantData[i], i, resultData[i]); - free( constantData ); + free(constantData); free(resultData); - free_mtdata(d); d = NULL; + free_mtdata(d); + d = NULL; return -1; } - free( constantData ); + free(constantData); free(resultData); } - free_mtdata(d); d = NULL; + free_mtdata(d); + d = NULL; - if (allocPassed) { - if (currentSize < maxSize/PASSING_FRACTION) { - log_error("Failed to allocate at least 1/8 of the reported constant size.\n"); + if (allocPassed) + { + if (currentSize < maxSize / PASSING_FRACTION) + { + log_error("Failed to allocate at least 1/8 of the reported " + "constant size.\n"); return -1; - } else if (currentSize != maxSize) { - log_info("Passed at reduced size. (%lld of %lld bytes)\n", currentSize, maxSize); + } + else if (currentSize != maxSize) + { + log_info("Passed at reduced size. (%lld of %lld bytes)\n", + currentSize, maxSize); return 0; } return 0; @@ -1446,13 +1771,14 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, return -1; } -int test_min_max_constant_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_constant_args(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; clProgramWrapper program; clKernelWrapper kernel; - clMemWrapper *streams; - size_t threads[1], localThreads[1]; + clMemWrapper *streams; + size_t threads[1], localThreads[1]; cl_uint i, maxArgs; cl_ulong maxSize; cl_ulong maxParameterSize; @@ -1465,119 +1791,145 @@ int test_min_max_constant_args(cl_device_id deviceID, cl_context context, cl_com /* Verify our test buffer won't be bigger than allowed */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_ARGS, sizeof( maxArgs ), &maxArgs, 0 ); - test_error( error, "Unable to get max constant arg count" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_CONSTANT_ARGS, + sizeof(maxArgs), &maxArgs, 0); + test_error(error, "Unable to get max constant arg count"); - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxParameterSize ), &maxParameterSize, NULL ); - test_error( error, "Unable to get max parameter size from device" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, + sizeof(maxParameterSize), &maxParameterSize, NULL); + test_error(error, "Unable to get max parameter size from device"); // Subtract the size of the result maxParameterSize -= sizeof(cl_mem); // Calculate the number we can use - if (maxParameterSize/sizeof(cl_mem) < maxArgs) { - log_info("WARNING: Max parameter size of %d bytes limits test to %d max image arguments.\n", (int)maxParameterSize, (int)(maxParameterSize/sizeof(cl_mem))); - maxArgs = (unsigned int)(maxParameterSize/sizeof(cl_mem)); + if (maxParameterSize / sizeof(cl_mem) < maxArgs) + { + log_info("WARNING: Max parameter size of %d bytes limits test to %d " + "max image arguments.\n", + (int)maxParameterSize, + (int)(maxParameterSize / sizeof(cl_mem))); + maxArgs = (unsigned int)(maxParameterSize / sizeof(cl_mem)); } - if( maxArgs < (gIsEmbedded ? 4 : 8) ) + if (maxArgs < (gIsEmbedded ? 4 : 8)) { - log_error( "ERROR: Reported max constant arg count less than required by OpenCL 1.0 (reported %d)\n", (int)maxArgs ); + log_error("ERROR: Reported max constant arg count less than required " + "by OpenCL 1.0 (reported %d)\n", + (int)maxArgs); return -1; } - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 ); - test_error( error, "Unable to get max constant buffer size" ); - individualBufferSize = ((int)maxSize/2)/maxArgs; + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, + sizeof(maxSize), &maxSize, 0); + test_error(error, "Unable to get max constant buffer size"); + individualBufferSize = (maxSize / 2) / maxArgs; - log_info("Reported max constant arg count of %d and max constant buffer size of %d. Test will attempt to allocate half of that, or %d buffers of size %d.\n", - (int)maxArgs, (int)maxSize, (int)maxArgs, (int)individualBufferSize); + log_info( + "Reported max constant arg count of %u and max constant buffer " + "size of %llu. Test will attempt to allocate half of that, or %llu " + "buffers of size %zu.\n", + maxArgs, maxSize, maxArgs, individualBufferSize); - str2 = (char*)malloc(sizeof(char)*32*(maxArgs+2)); - constArgs = (char*)malloc(sizeof(char)*32*(maxArgs+2)); - programSrc = (char*)malloc(sizeof(char)*32*2*(maxArgs+2)+1024); + str2 = (char *)malloc(sizeof(char) * 32 * (maxArgs + 2)); + constArgs = (char *)malloc(sizeof(char) * 32 * (maxArgs + 2)); + programSrc = (char *)malloc(sizeof(char) * 32 * 2 * (maxArgs + 2) + 1024); /* Create a test program */ constArgs[0] = 0; str2[0] = 0; - for( i = 0; i < maxArgs-1; i++ ) - { - sprintf( str, ", __constant int *src%d", (int)( i + 2 ) ); - strcat( constArgs, str ); - sprintf( str2 + strlen( str2), "\tdst[tid] += src%d[tid];\n", (int)(i+2)); - if (strlen(str2) > (sizeof(char)*32*(maxArgs+2)-32) || strlen(constArgs) > (sizeof(char)*32*(maxArgs+2)-32)) { - log_info("Limiting number of arguments tested to %d due to test program allocation size.\n", i); + for (i = 0; i < maxArgs - 1; i++) + { + sprintf(str, ", __constant int *src%d", (int)(i + 2)); + strcat(constArgs, str); + sprintf(str2 + strlen(str2), "\tdst[tid] += src%d[tid];\n", + (int)(i + 2)); + if (strlen(str2) > (sizeof(char) * 32 * (maxArgs + 2) - 32) + || strlen(constArgs) > (sizeof(char) * 32 * (maxArgs + 2) - 32)) + { + log_info("Limiting number of arguments tested to %d due to test " + "program allocation size.\n", + i); break; } } - sprintf( programSrc, sample_const_max_arg_kernel_pattern, constArgs, str2 ); + sprintf(programSrc, sample_const_max_arg_kernel_pattern, constArgs, str2); /* Create a kernel to test with */ ptr = programSrc; - if( create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_test" ) != 0 ) + if (create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "sample_test") + != 0) { return -1; } /* Create some I/O streams */ - streams = new clMemWrapper[ maxArgs + 1 ]; - for( i = 0; i < maxArgs + 1; i++ ) + streams = new clMemWrapper[maxArgs + 1]; + for (i = 0; i < maxArgs + 1; i++) { streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, individualBufferSize, NULL, &error); - test_error( error, "Creating test array failed" ); + test_error(error, "Creating test array failed"); } /* Set the arguments */ - for( i = 0; i < maxArgs + 1; i++ ) + for (i = 0; i < maxArgs + 1; i++) { - error = clSetKernelArg(kernel, i, sizeof( streams[i] ), &streams[i]); - test_error( error, "Unable to set kernel argument" ); + error = clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]); + test_error(error, "Unable to set kernel argument"); } /* Test running the kernel and verifying it */ threads[0] = (size_t)10; - while (threads[0]*sizeof(cl_int) > individualBufferSize) - threads[0]--; + while (threads[0] * sizeof(cl_int) > individualBufferSize) threads[0]--; - error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] ); - test_error( error, "Unable to get work group size to use" ); + error = get_max_common_work_group_size(context, kernel, threads[0], + &localThreads[0]); + test_error(error, "Unable to get work group size to use"); - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, &event ); - test_error( error, "clEnqueueNDRangeKernel failed"); + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, + localThreads, 0, NULL, &event); + test_error(error, "clEnqueueNDRangeKernel failed"); // Verify that the event does not return an error from the execution error = clWaitForEvents(1, &event); - test_error( error, "clWaitForEvent failed"); - error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL); - test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed"); + test_error(error, "clWaitForEvent failed"); + error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(event_status), &event_status, NULL); + test_error(error, + "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed"); clReleaseEvent(event); if (event_status < 0) test_error(error, "Kernel execution event returned error"); error = clFinish(queue); - test_error( error, "clFinish failed."); + test_error(error, "clFinish failed."); - delete [] streams; + delete[] streams; free(str2); free(constArgs); free(programSrc); return 0; } -int test_min_max_compute_units(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_compute_units(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_uint value; - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof( value ), &value, 0 ); - test_error( error, "Unable to get compute unit count" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, + sizeof(value), &value, 0); + test_error(error, "Unable to get compute unit count"); - if( value < 1 ) + if (value < 1) { - log_error( "ERROR: Reported compute unit count less than required by OpenCL 1.0 (reported %d)\n", (int)value ); + log_error("ERROR: Reported compute unit count less than required by " + "OpenCL 1.0 (reported %d)\n", + (int)value); return -1; } @@ -1586,18 +1938,22 @@ int test_min_max_compute_units(cl_device_id deviceID, cl_context context, cl_com return 0; } -int test_min_max_address_bits(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_address_bits(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_uint value; - error = clGetDeviceInfo( deviceID, CL_DEVICE_ADDRESS_BITS, sizeof( value ), &value, 0 ); - test_error( error, "Unable to get address bit count" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_ADDRESS_BITS, sizeof(value), + &value, 0); + test_error(error, "Unable to get address bit count"); - if( value != 32 && value != 64 ) + if (value != 32 && value != 64) { - log_error( "ERROR: Reported address bit count not valid by OpenCL 1.0 (reported %d)\n", (int)value ); + log_error("ERROR: Reported address bit count not valid by OpenCL 1.0 " + "(reported %d)\n", + (int)value); return -1; } @@ -1606,167 +1962,200 @@ int test_min_max_address_bits(cl_device_id deviceID, cl_context context, cl_comm return 0; } -int test_min_max_single_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_single_fp_config(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_device_fp_config value; char profile[128] = ""; - error = clGetDeviceInfo( deviceID, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( value ), &value, 0 ); - test_error( error, "Unable to get device single fp config" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(value), + &value, 0); + test_error(error, "Unable to get device single fp config"); - //Check to see if we are an embedded profile device - if((error = clGetDeviceInfo( deviceID, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL ))) + // Check to see if we are an embedded profile device + if ((error = clGetDeviceInfo(deviceID, CL_DEVICE_PROFILE, sizeof(profile), + profile, NULL))) { - log_error( "FAILURE: Unable to get CL_DEVICE_PROFILE: error %d\n", error ); + log_error("FAILURE: Unable to get CL_DEVICE_PROFILE: error %d\n", + error); return error; } - if( 0 == strcmp( profile, "EMBEDDED_PROFILE" )) + if (0 == strcmp(profile, "EMBEDDED_PROFILE")) { // embedded device - if( 0 == (value & (CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO))) + if (0 == (value & (CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO))) { - log_error( "FAILURE: embedded device supports neither CL_FP_ROUND_TO_NEAREST or CL_FP_ROUND_TO_ZERO\n" ); + log_error("FAILURE: embedded device supports neither " + "CL_FP_ROUND_TO_NEAREST or CL_FP_ROUND_TO_ZERO\n"); return -1; } } else { // Full profile - if( ( value & ( CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN )) != ( CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN ) ) + if ((value & (CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN)) + != (CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN)) { - log_error( "ERROR: Reported single fp config doesn't meet minimum set by OpenCL 1.0 (reported 0x%08x)\n", (int)value ); + log_error("ERROR: Reported single fp config doesn't meet minimum " + "set by OpenCL 1.0 (reported 0x%08x)\n", + (int)value); return -1; } } return 0; } -int test_min_max_double_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_double_fp_config(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_device_fp_config value; - error = clGetDeviceInfo( deviceID, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof( value ), &value, 0 ); - test_error( error, "Unable to get device double fp config" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(value), + &value, 0); + test_error(error, "Unable to get device double fp config"); - if (value == 0) - return 0; + if (value == 0) return 0; - if( ( value & (CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM)) != ( CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM) ) + if ((value + & (CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO + | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM)) + != (CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO + | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM)) { - log_error( "ERROR: Reported double fp config doesn't meet minimum set by OpenCL 1.0 (reported 0x%08x)\n", (int)value ); + log_error("ERROR: Reported double fp config doesn't meet minimum set " + "by OpenCL 1.0 (reported 0x%08x)\n", + (int)value); return -1; } return 0; } -int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; clProgramWrapper program; clKernelWrapper kernel; - clMemWrapper streams[3]; - size_t threads[1], localThreads[1]; + clMemWrapper streams[3]; + size_t threads[1], localThreads[1]; cl_int *localData, *resultData; cl_ulong maxSize, kernelLocalUsage, min_max_local_mem_size; - cl_char buffer[ 4098 ]; - size_t length; + Version device_version; int i; int err = 0; MTdata d; /* Verify our test buffer won't be bigger than allowed */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( maxSize ), &maxSize, 0 ); - test_error( error, "Unable to get max local buffer size" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(maxSize), + &maxSize, 0); + test_error(error, "Unable to get max local buffer size"); + + try + { + device_version = get_device_cl_version(deviceID); + } catch (const std::runtime_error &e) + { + log_error("%s", e.what()); + return -1; + } - // Device version should fit the regex "OpenCL [0-9]+\.[0-9]+ *.*" - error = clGetDeviceInfo( deviceID, CL_DEVICE_VERSION, sizeof( buffer ), buffer, &length ); - test_error( error, "Unable to get device version string" ); if (!gIsEmbedded) { - if( memcmp( buffer, "OpenCL 2.0", strlen( "OpenCL 2.0" ) ) == 0 ) - min_max_local_mem_size = 16L * 1024L; - else if( memcmp( buffer, "OpenCL 2.1", strlen( "OpenCL 2.1" ) ) != 0 ) - min_max_local_mem_size = 16L * 1024L; - else if( memcmp( buffer, "OpenCL 1.2", strlen( "OpenCL 1.2" ) ) != 0 ) - min_max_local_mem_size = 16L * 1024L; - else if( memcmp( buffer, "OpenCL 1.1", strlen( "OpenCL 1.1" ) ) != 0 ) + if (device_version == Version(1, 0)) min_max_local_mem_size = 16L * 1024L; - else if ( memcmp( buffer, "OpenCL 1.0", strlen( "OpenCL 1.0" ) ) != 0 ) - min_max_local_mem_size = 32L * 1024L; else - { - log_error( "ERROR: device version string does not match required format! (returned: %s)\n", (char *)buffer ); - return -1; - } + min_max_local_mem_size = 32L * 1024L; + } + else + { + min_max_local_mem_size = 1L * 1024L; } - if( maxSize < (gIsEmbedded ? 1L * 1024L : min_max_local_mem_size) ) + if (maxSize < min_max_local_mem_size) { - log_error( "ERROR: Reported local mem size less than required by OpenCL 1.1 (reported %dKb)\n", (int)( maxSize / 1024L ) ); + const std::string version_as_string = device_version.to_string(); + log_error("ERROR: Reported local mem size less than required by OpenCL " + "%s (reported %d KB)\n", + version_as_string.c_str(), (int)(maxSize / 1024L)); return -1; } - log_info("Reported max local buffer size for device: %lld bytes.\n", maxSize); + log_info("Reported max local buffer size for device: %lld bytes.\n", + maxSize); /* Create a kernel to test with */ - if( create_single_kernel_helper( context, &program, &kernel, 1, sample_local_arg_kernel, "sample_test" ) != 0 ) + if (create_single_kernel_helper(context, &program, &kernel, 1, + sample_local_arg_kernel, "sample_test") + != 0) { return -1; } - error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(kernelLocalUsage), &kernelLocalUsage, NULL); - test_error(error, "clGetKernelWorkGroupInfo for CL_KERNEL_LOCAL_MEM_SIZE failed"); + error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE, + sizeof(kernelLocalUsage), + &kernelLocalUsage, NULL); + test_error(error, + "clGetKernelWorkGroupInfo for CL_KERNEL_LOCAL_MEM_SIZE failed"); - log_info("Reported local buffer usage for kernel (CL_KERNEL_LOCAL_MEM_SIZE): %lld bytes.\n", kernelLocalUsage); + log_info("Reported local buffer usage for kernel " + "(CL_KERNEL_LOCAL_MEM_SIZE): %lld bytes.\n", + kernelLocalUsage); /* Create some I/O streams */ - size_t sizeToAllocate = ((size_t)(maxSize-kernelLocalUsage)/sizeof( cl_int ))*sizeof(cl_int); - size_t numberOfInts = sizeToAllocate/sizeof(cl_int); + size_t sizeToAllocate = + ((size_t)(maxSize - kernelLocalUsage) / sizeof(cl_int)) + * sizeof(cl_int); + size_t numberOfInts = sizeToAllocate / sizeof(cl_int); - log_info("Attempting to use %lld bytes of local memory.\n", (cl_ulong)sizeToAllocate); + log_info("Attempting to use %zu bytes of local memory.\n", sizeToAllocate); - localData = (cl_int *)malloc( sizeToAllocate ); - d = init_genrand( gRandomSeed ); - for(i=0; i<(int)(numberOfInts); i++) + localData = (cl_int *)malloc(sizeToAllocate); + d = init_genrand(gRandomSeed); + for (i = 0; i < (int)(numberOfInts); i++) localData[i] = (int)genrand_int32(d); - free_mtdata(d); d = NULL; + free_mtdata(d); + d = NULL; streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeToAllocate, localData, &error); - test_error( error, "Creating test array failed" ); + test_error(error, "Creating test array failed"); streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate, NULL, &error); - test_error( error, "Creating test array failed" ); + test_error(error, "Creating test array failed"); /* Set the arguments */ error = clSetKernelArg(kernel, 0, sizeToAllocate, NULL); - test_error( error, "Unable to set indexed kernel arguments" ); - error = clSetKernelArg(kernel, 1, sizeof( streams[0] ), &streams[0]); - test_error( error, "Unable to set indexed kernel arguments" ); - error = clSetKernelArg(kernel, 2, sizeof( streams[1] ), &streams[1]); - test_error( error, "Unable to set indexed kernel arguments" ); + test_error(error, "Unable to set indexed kernel arguments"); + error = clSetKernelArg(kernel, 1, sizeof(streams[0]), &streams[0]); + test_error(error, "Unable to set indexed kernel arguments"); + error = clSetKernelArg(kernel, 2, sizeof(streams[1]), &streams[1]); + test_error(error, "Unable to set indexed kernel arguments"); /* Test running the kernel and verifying it */ threads[0] = numberOfInts; localThreads[0] = 1; - log_info("Creating local buffer with %d cl_ints (%d bytes).\n", (int)numberOfInts, (int)sizeToAllocate); + log_info("Creating local buffer with %zu cl_ints (%zu bytes).\n", + numberOfInts, sizeToAllocate); cl_event evt; - cl_int evt_err; - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, &evt ); + cl_int evt_err; + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, + localThreads, 0, NULL, &evt); test_error(error, "clEnqueueNDRangeKernel failed"); error = clFinish(queue); - test_error( error, "clFinish failed"); + test_error(error, "clFinish failed"); - error = clGetEventInfo(evt, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof evt_err, &evt_err, NULL); - test_error( error, "clGetEventInfo with maximum local buffer size failed."); + error = clGetEventInfo(evt, CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof evt_err, &evt_err, NULL); + test_error(error, "clGetEventInfo with maximum local buffer size failed."); - if (evt_err != CL_COMPLETE) { + if (evt_err != CL_COMPLETE) + { print_error(evt_err, "Kernel event returned error"); clReleaseEvent(evt); return -1; @@ -1774,95 +2163,118 @@ int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, cl_co resultData = (cl_int *)malloc(sizeToAllocate); - error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, sizeToAllocate, resultData, 0, NULL, NULL); - test_error( error, "clEnqueueReadBuffer failed"); + error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, sizeToAllocate, + resultData, 0, NULL, NULL); + test_error(error, "clEnqueueReadBuffer failed"); - for(i=0; i<(int)(numberOfInts); i++) - if (localData[i] != resultData[i]) { + for (i = 0; i < (int)(numberOfInts); i++) + if (localData[i] != resultData[i]) + { clReleaseEvent(evt); - free( localData ); + free(localData); free(resultData); log_error("Results failed to verify.\n"); return -1; } clReleaseEvent(evt); - free( localData ); + free(localData); free(resultData); return err; } -int test_min_max_kernel_preferred_work_group_size_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_kernel_preferred_work_group_size_multiple( + cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements) { - int err; + int err; clProgramWrapper program; clKernelWrapper kernel; size_t max_local_workgroup_size[3]; size_t max_workgroup_size = 0, preferred_workgroup_size = 0; - err = create_single_kernel_helper(context, &program, &kernel, 1, sample_local_arg_kernel, "sample_test" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + sample_local_arg_kernel, "sample_test"); test_error(err, "Failed to build kernel/program."); err = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, - sizeof(max_workgroup_size), &max_workgroup_size, NULL); + sizeof(max_workgroup_size), + &max_workgroup_size, NULL); test_error(err, "clGetKernelWorkgroupInfo failed."); - err = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, - sizeof(preferred_workgroup_size), &preferred_workgroup_size, NULL); + err = clGetKernelWorkGroupInfo( + kernel, deviceID, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, + sizeof(preferred_workgroup_size), &preferred_workgroup_size, NULL); test_error(err, "clGetKernelWorkgroupInfo failed."); - err = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL); + err = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, + sizeof(max_local_workgroup_size), + max_local_workgroup_size, NULL); test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES"); - // Since the preferred size is only a performance hint, we can only really check that we get a sane value - // back - log_info( "size: %ld preferred: %ld max: %ld\n", max_workgroup_size, preferred_workgroup_size, max_local_workgroup_size[0] ); + // Since the preferred size is only a performance hint, we can only really + // check that we get a sane value back + log_info("size: %ld preferred: %ld max: %ld\n", max_workgroup_size, + preferred_workgroup_size, max_local_workgroup_size[0]); - if( preferred_workgroup_size > max_workgroup_size ) + if (preferred_workgroup_size > max_workgroup_size) { - log_error( "ERROR: Reported preferred workgroup multiple larger than max workgroup size (preferred %ld, max %ld)\n", preferred_workgroup_size, max_workgroup_size ); + log_error("ERROR: Reported preferred workgroup multiple larger than " + "max workgroup size (preferred %ld, max %ld)\n", + preferred_workgroup_size, max_workgroup_size); return -1; } return 0; } -int test_min_max_execution_capabilities(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_execution_capabilities(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { int error; cl_device_exec_capabilities value; - error = clGetDeviceInfo( deviceID, CL_DEVICE_EXECUTION_CAPABILITIES, sizeof( value ), &value, 0 ); - test_error( error, "Unable to get execution capabilities" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_EXECUTION_CAPABILITIES, + sizeof(value), &value, 0); + test_error(error, "Unable to get execution capabilities"); - if( ( value & CL_EXEC_KERNEL ) != CL_EXEC_KERNEL ) + if ((value & CL_EXEC_KERNEL) != CL_EXEC_KERNEL) { - log_error( "ERROR: Reported execution capabilities less than required by OpenCL 1.0 (reported 0x%08x)\n", (int)value ); + log_error("ERROR: Reported execution capabilities less than required " + "by OpenCL 1.0 (reported 0x%08x)\n", + (int)value); return -1; } return 0; } -int test_min_max_queue_properties(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_queue_properties(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_command_queue_properties value; - error = clGetDeviceInfo( deviceID, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, sizeof( value ), &value, 0 ); - test_error( error, "Unable to get queue properties" ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, + sizeof(value), &value, 0); + test_error(error, "Unable to get queue properties"); - if( ( value & CL_QUEUE_PROFILING_ENABLE ) != CL_QUEUE_PROFILING_ENABLE ) + if ((value & CL_QUEUE_PROFILING_ENABLE) != CL_QUEUE_PROFILING_ENABLE) { - log_error( "ERROR: Reported queue properties less than required by OpenCL 1.0 (reported 0x%08x)\n", (int)value ); + log_error("ERROR: Reported queue properties less than required by " + "OpenCL 1.0 (reported 0x%08x)\n", + (int)value); return -1; } return 0; } -int test_min_max_device_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_device_version(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { // Query for the device version. Version device_cl_version = get_device_cl_version(deviceID); @@ -1958,84 +2370,101 @@ int test_min_max_device_version(cl_device_id deviceID, cl_context context, cl_co return 0; } -int test_min_max_language_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_min_max_language_version(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { cl_int error; - cl_char buffer[ 4098 ]; + cl_char buffer[4098]; size_t length; // Device version should fit the regex "OpenCL [0-9]+\.[0-9]+ *.*" - error = clGetDeviceInfo( deviceID, CL_DEVICE_OPENCL_C_VERSION, sizeof( buffer ), buffer, &length ); - test_error( error, "Unable to get device opencl c version string" ); - if( memcmp( buffer, "OpenCL C ", strlen( "OpenCL C " ) ) != 0 ) - { - log_error( "ERROR: Initial part of device language version string does not match required format! (returned: \"%s\")\n", (char *)buffer ); + error = clGetDeviceInfo(deviceID, CL_DEVICE_OPENCL_C_VERSION, + sizeof(buffer), buffer, &length); + test_error(error, "Unable to get device opencl c version string"); + if (memcmp(buffer, "OpenCL C ", strlen("OpenCL C ")) != 0) + { + log_error("ERROR: Initial part of device language version string does " + "not match required format! (returned: \"%s\")\n", + (char *)buffer); return -1; } log_info("Returned version \"%s\".\n", buffer); - char *p1 = (char *)buffer + strlen( "OpenCL C " ); - while( *p1 == ' ' ) - p1++; + char *p1 = (char *)buffer + strlen("OpenCL C "); + while (*p1 == ' ') p1++; char *p2 = p1; - if( ! isdigit(*p2) ) + if (!isdigit(*p2)) { - log_error( "ERROR: Major revision number must follow space behind OpenCL C! (returned %s)\n", (char*) buffer ); + log_error("ERROR: Major revision number must follow space behind " + "OpenCL C! (returned %s)\n", + (char *)buffer); return -1; } - while( isdigit( *p2 ) ) - p2++; - if( *p2 != '.' ) + while (isdigit(*p2)) p2++; + if (*p2 != '.') { - log_error( "ERROR: Version number must contain a decimal point! (returned: %s)\n", (char *)buffer ); + log_error("ERROR: Version number must contain a decimal point! " + "(returned: %s)\n", + (char *)buffer); return -1; } char *p3 = p2 + 1; - if( ! isdigit(*p3) ) + if (!isdigit(*p3)) { - log_error( "ERROR: Minor revision number is missing or does not abut the decimal point! (returned %s)\n", (char*) buffer ); + log_error("ERROR: Minor revision number is missing or does not abut " + "the decimal point! (returned %s)\n", + (char *)buffer); return -1; } - while( isdigit( *p3 ) ) - p3++; - if( *p3 != ' ' ) + while (isdigit(*p3)) p3++; + if (*p3 != ' ') { - log_error( "ERROR: A space must appear after the minor version! (returned: %s)\n", (char *)buffer ); + log_error("ERROR: A space must appear after the minor version! " + "(returned: %s)\n", + (char *)buffer); return -1; } *p2 = ' '; // Put in a space for atoi below. p2++; - int major = atoi( p1 ); - int minor = atoi( p2 ); + int major = atoi(p1); + int minor = atoi(p2); int minor_revision = 2; - if( major * 10 + minor < 10 + minor_revision ) + if (major * 10 + minor < 10 + minor_revision) { - // If the language version did not match, check to see if OPENCL_1_0_DEVICE is set. - if( getenv("OPENCL_1_0_DEVICE")) + // If the language version did not match, check to see if + // OPENCL_1_0_DEVICE is set. + if (getenv("OPENCL_1_0_DEVICE")) { - log_info( "WARNING: This test was run with OPENCL_1_0_DEVICE defined! This is not a OpenCL 1.1 or OpenCL 1.2 compatible device!!!\n" ); + log_info("WARNING: This test was run with OPENCL_1_0_DEVICE " + "defined! This is not a OpenCL 1.1 or OpenCL 1.2 " + "compatible device!!!\n"); } - else if( getenv("OPENCL_1_1_DEVICE")) + else if (getenv("OPENCL_1_1_DEVICE")) { - log_info( "WARNING: This test was run with OPENCL_1_1_DEVICE defined! This is not a OpenCL 1.2 compatible device!!!\n" ); + log_info( + "WARNING: This test was run with OPENCL_1_1_DEVICE defined! " + "This is not a OpenCL 1.2 compatible device!!!\n"); } else { - log_error( "ERROR: OpenCL device language version returned is less than 1.%d! (Returned: %s)\n", minor_revision, (char *)buffer ); - return -1; + log_error("ERROR: OpenCL device language version returned is less " + "than 1.%d! (Returned: %s)\n", + minor_revision, (char *)buffer); + return -1; } } // Sanity checks on the returned values - if( length != (strlen( (char *)buffer ) + 1 )) + if (length != (strlen((char *)buffer) + 1)) { - log_error( "ERROR: Returned length of version string does not match actual length (actual: %d, returned: %d)\n", (int)strlen( (char *)buffer ), (int)length ); + log_error("ERROR: Returned length of version string does not match " + "actual length (actual: %d, returned: %d)\n", + (int)strlen((char *)buffer), (int)length); return -1; } return 0; } - diff --git a/test_conformance/api/test_clone_kernel.cpp b/test_conformance/api/test_clone_kernel.cpp index 1f2278165a..1a7e67a678 100644 --- a/test_conformance/api/test_clone_kernel.cpp +++ b/test_conformance/api/test_clone_kernel.cpp @@ -113,15 +113,16 @@ int test_image_arg_shallow_clone(cl_device_id deviceID, cl_context context, cl_c clSamplerWrapper sampler; img_format.image_channel_order = CL_RGBA; img_format.image_channel_data_type = CL_UNSIGNED_INT8; - cl_image_desc imageDesc; - memset(&imageDesc, 0x0, sizeof(cl_image_desc)); + cl_image_desc imageDesc; + memset(&imageDesc, 0x0, sizeof(cl_image_desc)); imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 512; imageDesc.image_height = 512; cl_uint color[4] = {1,3,5,7}; - clProgramWrapper program; + clProgramWrapper program_read; + clProgramWrapper program_write; clKernelWrapper kernel_read; clKernelWrapper kernel_write; clKernelWrapper kernel_cloned; @@ -129,12 +130,16 @@ int test_image_arg_shallow_clone(cl_device_id deviceID, cl_context context, cl_c clMemWrapper img; - if( create_single_kernel_helper( context, &program, &kernel_read, 1, clone_kernel_test_img, "img_read_kernel" ) != 0 ) + if (create_single_kernel_helper(context, &program_read, &kernel_read, 1, + clone_kernel_test_img, "img_read_kernel") + != 0) { return -1; } - if( create_single_kernel_helper( context, &program, &kernel_write, 1, clone_kernel_test_img, "img_write_kernel" ) != 0 ) + if (create_single_kernel_helper(context, &program_write, &kernel_write, 1, + clone_kernel_test_img, "img_write_kernel") + != 0) { return -1; } @@ -241,6 +246,8 @@ int test_clone_kernel(cl_device_id deviceID, cl_context context, cl_command_queu { int error; clProgramWrapper program; + clProgramWrapper program_buf_read; + clProgramWrapper program_buf_write; clKernelWrapper kernel; clKernelWrapper kernel_pipe_read; clKernelWrapper kernel_buf_read; @@ -272,12 +279,18 @@ int test_clone_kernel(cl_device_id deviceID, cl_context context, cl_command_queu return -1; } - if( create_single_kernel_helper( context, &program, &kernel_buf_read, 1, clone_kernel_test_kernel, "buf_read_kernel" ) != 0 ) + if (create_single_kernel_helper(context, &program_buf_read, + &kernel_buf_read, 1, + clone_kernel_test_kernel, "buf_read_kernel") + != 0) { return -1; } - if( create_single_kernel_helper( context, &program, &kernel_buf_write, 1, clone_kernel_test_kernel, "buf_write_kernel" ) != 0 ) + if (create_single_kernel_helper( + context, &program_buf_write, &kernel_buf_write, 1, + clone_kernel_test_kernel, "buf_write_kernel") + != 0) { return -1; } diff --git a/test_conformance/api/test_context_destructor_callback.cpp b/test_conformance/api/test_context_destructor_callback.cpp index 1d73a3c486..d29d90390a 100644 --- a/test_conformance/api/test_context_destructor_callback.cpp +++ b/test_conformance/api/test_context_destructor_callback.cpp @@ -52,12 +52,7 @@ int test_context_destructor_callback(cl_device_id deviceID, cl_context context, test_error(error, "Unable to set destructor callback"); // Now release the context, which SHOULD call the callbacks - error = clReleaseContext(localContext); - test_error(error, "Unable to release local context"); - - // Note: since we manually released the context, we need to set it to NULL - // to prevent a double-release - localContext = NULL; + localContext.reset(); // At this point, all three callbacks should have already been called int numErrors = 0; diff --git a/test_conformance/api/test_create_kernels.cpp b/test_conformance/api/test_create_kernels.cpp index 79e01fdb8f..568e84cb6c 100644 --- a/test_conformance/api/test_create_kernels.cpp +++ b/test_conformance/api/test_create_kernels.cpp @@ -525,11 +525,10 @@ int test_repeated_setup_cleanup(cl_device_id deviceID, cl_context context, cl_co local_queue = clCreateCommandQueue(local_context, deviceID, 0, &error); test_error( error, "clCreateCommandQueue failed"); - error = create_single_kernel_helper(local_context, &local_program, NULL, 1, &repeate_test_kernel, NULL); - test_error( error, "Unable to build test program" ); - - local_kernel = clCreateKernel(local_program, "test_kernel", &error); - test_error( error, "clCreateKernel failed"); + error = create_single_kernel_helper( + local_context, &local_program, &local_kernel, 1, + &repeate_test_kernel, "test_kernel"); + test_error(error, "Unable to create kernel"); local_mem_in = clCreateBuffer(local_context, CL_MEM_READ_ONLY, TEST_SIZE*sizeof(cl_int), NULL, &error); test_error( error, "clCreateBuffer failed"); diff --git a/test_conformance/api/test_kernel_arg_info.cpp b/test_conformance/api/test_kernel_arg_info.cpp index f1039aeccb..dddb4a2328 100644 --- a/test_conformance/api/test_kernel_arg_info.cpp +++ b/test_conformance/api/test_kernel_arg_info.cpp @@ -1,5943 +1,1010 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "testBase.h" -#include -#include -#ifndef _WIN32 -#include -#endif - -#define ARG_INFO_FIELD_COUNT 5 - -#define ARG_INFO_ADDR_OFFSET 1 -#define ARG_INFO_ACCESS_OFFSET 2 -#define ARG_INFO_TYPE_QUAL_OFFSET 3 -#define ARG_INFO_TYPE_NAME_OFFSET 4 -#define ARG_INFO_ARG_NAME_OFFSET 5 - - -typedef char const * kernel_args_t[]; - -kernel_args_t required_kernel_args = { - "typedef float4 typedef_type;\n" - "\n" - "typedef struct struct_type {\n" - " float4 float4d;\n" - " int intd;\n" - "} typedef_struct_type;\n" - "\n" - "typedef union union_type {\n" - " float4 float4d;\n" - " uint4 uint4d;\n" - "} typedef_union_type;\n" - "\n" - "typedef enum enum_type {\n" - " enum_type_zero,\n" - " enum_type_one,\n" - " enum_type_two\n" - "} typedef_enum_type;\n" - "\n" - "kernel void constant_scalar_p0(constant void*constantvoidp,\n" - " constant char *constantcharp,\n" - " constant uchar* constantucharp,\n" - " constant unsigned char * constantunsignedcharp)\n" - "{}\n", - "kernel void constant_scalar_p1(constant short*constantshortp,\n" - " constant ushort *constantushortp,\n" - " constant unsigned short* constantunsignedshortp,\n" - " constant int * constantintp)\n" - "{}\n", - "kernel void constant_scalar_p2(constant uint*constantuintp,\n" - " constant unsigned int *constantunsignedintp)\n" - "{}\n", - "kernel void constant_scalar_p3(constant float *constantfloatp)\n" - "{}\n", - "\n" - "kernel void constant_scalar_restrict_p0(constant void* restrict constantvoidrestrictp,\n" - " constant char * restrict constantcharrestrictp,\n" - " constant uchar*restrict constantucharrestrictp,\n" - " constant unsigned char *restrict constantunsignedcharrestrictp)\n" - "{}\n", - "kernel void constant_scalar_restrict_p1(constant short* restrict constantshortrestrictp,\n" - " constant ushort * restrict constantushortrestrictp,\n" - " constant unsigned short*restrict constantunsignedshortrestrictp,\n" - " constant int *restrict constantintrestrictp)\n" - "{}\n", - "kernel void constant_scalar_restrict_p2(constant uint* restrict constantuintrestrictp,\n" - " constant unsigned int * restrict constantunsignedintrestrictp)\n" - "{}\n", - "kernel void constant_scalar_restrict_p3(constant float * restrict constantfloatrestrictp)\n" - "{}\n", - "\n" - "kernel void global_scalar_p(global void*globalvoidp,\n" - " global char *globalcharp,\n" - " global uchar* globalucharp,\n" - " global unsigned char * globalunsignedcharp,\n" - " global short*globalshortp,\n" - " global ushort *globalushortp,\n" - " global unsigned short* globalunsignedshortp,\n" - " global int * globalintp,\n" - " global uint*globaluintp,\n" - " global unsigned int *globalunsignedintp,\n" - " global float *globalfloatp)\n" - "{}\n", - "\n" - "kernel void global_scalar_restrict_p(global void* restrict globalvoidrestrictp,\n" - " global char * restrict globalcharrestrictp,\n" - " global uchar*restrict globalucharrestrictp,\n" - " global unsigned char *restrict globalunsignedcharrestrictp,\n" - " global short* restrict globalshortrestrictp,\n" - " global ushort * restrict globalushortrestrictp,\n" - " global unsigned short*restrict globalunsignedshortrestrictp,\n" - " global int *restrict globalintrestrictp,\n" - " global uint* restrict globaluintrestrictp,\n" - " global unsigned int * restrict globalunsignedintrestrictp,\n" - " global float * restrict globalfloatrestrictp)\n" - "{}\n", - "\n" - "kernel void global_const_scalar_p(global const void*globalconstvoidp,\n" - " global const char *globalconstcharp,\n" - " global const uchar* globalconstucharp,\n" - " global const unsigned char * globalconstunsignedcharp,\n" - " global const short*globalconstshortp,\n" - " global const ushort *globalconstushortp,\n" - " global const unsigned short* globalconstunsignedshortp,\n" - " global const int * globalconstintp,\n" - " global const uint*globalconstuintp,\n" - " global const unsigned int *globalconstunsignedintp,\n" - " global const float *globalconstfloatp)\n" - "{}\n", - "\n" - "kernel void global_const_scalar_restrict_p(global const void* restrict globalconstvoidrestrictp,\n" - " global const char * restrict globalconstcharrestrictp,\n" - " global const uchar*restrict globalconstucharrestrictp,\n" - " global const unsigned char *restrict globalconstunsignedcharrestrictp,\n" - " global const short* restrict globalconstshortrestrictp,\n" - " global const ushort * restrict globalconstushortrestrictp,\n" - " global const unsigned short*restrict globalconstunsignedshortrestrictp,\n" - " global const int *restrict globalconstintrestrictp,\n" - " global const uint* restrict globalconstuintrestrictp,\n" - " global const unsigned int * restrict globalconstunsignedintrestrictp,\n" - " global const float * restrict globalconstfloatrestrictp)\n" - "{}\n", - "\n" - "kernel void global_volatile_scalar_p(global volatile void*globalvolatilevoidp,\n" - " global volatile char *globalvolatilecharp,\n" - " global volatile uchar* globalvolatileucharp,\n" - " global volatile unsigned char * globalvolatileunsignedcharp,\n" - " global volatile short*globalvolatileshortp,\n" - " global volatile ushort *globalvolatileushortp,\n" - " global volatile unsigned short* globalvolatileunsignedshortp,\n" - " global volatile int * globalvolatileintp,\n" - " global volatile uint*globalvolatileuintp,\n" - " global volatile unsigned int *globalvolatileunsignedintp,\n" - " global volatile float *globalvolatilefloatp)\n" - "{}\n", - "\n" - "kernel void global_volatile_scalar_restrict_p(global volatile void* restrict globalvolatilevoidrestrictp,\n" - " global volatile char * restrict globalvolatilecharrestrictp,\n" - " global volatile uchar*restrict globalvolatileucharrestrictp,\n" - " global volatile unsigned char *restrict globalvolatileunsignedcharrestrictp,\n" - " global volatile short* restrict globalvolatileshortrestrictp,\n" - " global volatile ushort * restrict globalvolatileushortrestrictp,\n" - " global volatile unsigned short*restrict globalvolatileunsignedshortrestrictp,\n" - " global volatile int *restrict globalvolatileintrestrictp,\n" - " global volatile uint* restrict globalvolatileuintrestrictp,\n" - " global volatile unsigned int * restrict globalvolatileunsignedintrestrictp,\n" - " global volatile float * restrict globalvolatilefloatrestrictp)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_scalar_p(global const volatile void*globalconstvolatilevoidp,\n" - " global const volatile char *globalconstvolatilecharp,\n" - " global const volatile uchar* globalconstvolatileucharp,\n" - " global const volatile unsigned char * globalconstvolatileunsignedcharp,\n" - " global const volatile short*globalconstvolatileshortp,\n" - " global const volatile ushort *globalconstvolatileushortp,\n" - " global const volatile unsigned short* globalconstvolatileunsignedshortp,\n" - " global const volatile int * globalconstvolatileintp,\n" - " global const volatile uint*globalconstvolatileuintp,\n" - " global const volatile unsigned int *globalconstvolatileunsignedintp,\n" - " global const volatile float *globalconstvolatilefloatp)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_scalar_restrict_p(global const volatile void* restrict globalconstvolatilevoidrestrictp,\n" - " global const volatile char * restrict globalconstvolatilecharrestrictp,\n" - " global const volatile uchar*restrict globalconstvolatileucharrestrictp,\n" - " global const volatile unsigned char *restrict globalconstvolatileunsignedcharrestrictp,\n" - " global const volatile short* restrict globalconstvolatileshortrestrictp,\n" - " global const volatile ushort * restrict globalconstvolatileushortrestrictp,\n" - " global const volatile unsigned short*restrict globalconstvolatileunsignedshortrestrictp,\n" - " global const volatile int *restrict globalconstvolatileintrestrictp,\n" - " global const volatile uint* restrict globalconstvolatileuintrestrictp,\n" - " global const volatile unsigned int * restrict globalconstvolatileunsignedintrestrictp,\n" - " global const volatile float * restrict globalconstvolatilefloatrestrictp)\n" - "{}\n", - "\n" - "kernel void local_scalar_p(local void*localvoidp,\n" - " local char *localcharp,\n" - " local uchar* localucharp,\n" - " local unsigned char * localunsignedcharp,\n" - " local short*localshortp,\n" - " local ushort *localushortp,\n" - " local unsigned short* localunsignedshortp,\n" - " local int * localintp,\n" - " local uint*localuintp,\n" - " local unsigned int *localunsignedintp,\n" - " local float *localfloatp)\n" - "{}\n", - "\n" - "kernel void local_scalar_restrict_p(local void* restrict localvoidrestrictp,\n" - " local char * restrict localcharrestrictp,\n" - " local uchar*restrict localucharrestrictp,\n" - " local unsigned char *restrict localunsignedcharrestrictp,\n" - " local short* restrict localshortrestrictp,\n" - " local ushort * restrict localushortrestrictp,\n" - " local unsigned short*restrict localunsignedshortrestrictp,\n" - " local int *restrict localintrestrictp,\n" - " local uint* restrict localuintrestrictp,\n" - " local unsigned int * restrict localunsignedintrestrictp,\n" - " local float * restrict localfloatrestrictp)\n" - "{}\n", - "\n" - "kernel void local_const_scalar_p(local const void*localconstvoidp,\n" - " local const char *localconstcharp,\n" - " local const uchar* localconstucharp,\n" - " local const unsigned char * localconstunsignedcharp,\n" - " local const short*localconstshortp,\n" - " local const ushort *localconstushortp,\n" - " local const unsigned short* localconstunsignedshortp,\n" - " local const int * localconstintp,\n" - " local const uint*localconstuintp,\n" - " local const unsigned int *localconstunsignedintp,\n" - " local const float *localconstfloatp)\n" - "{}\n", - "\n" - "kernel void local_const_scalar_restrict_p(local const void* restrict localconstvoidrestrictp,\n" - " local const char * restrict localconstcharrestrictp,\n" - " local const uchar*restrict localconstucharrestrictp,\n" - " local const unsigned char *restrict localconstunsignedcharrestrictp,\n" - " local const short* restrict localconstshortrestrictp,\n" - " local const ushort * restrict localconstushortrestrictp,\n" - " local const unsigned short*restrict localconstunsignedshortrestrictp,\n" - " local const int *restrict localconstintrestrictp,\n" - " local const uint* restrict localconstuintrestrictp,\n" - " local const unsigned int * restrict localconstunsignedintrestrictp,\n" - " local const float * restrict localconstfloatrestrictp)\n" - "{}\n", - "\n" - "kernel void local_volatile_scalar_p(local volatile void*localvolatilevoidp,\n" - " local volatile char *localvolatilecharp,\n" - " local volatile uchar* localvolatileucharp,\n" - " local volatile unsigned char * localvolatileunsignedcharp,\n" - " local volatile short*localvolatileshortp,\n" - " local volatile ushort *localvolatileushortp,\n" - " local volatile unsigned short* localvolatileunsignedshortp,\n" - " local volatile int * localvolatileintp,\n" - " local volatile uint*localvolatileuintp,\n" - " local volatile unsigned int *localvolatileunsignedintp,\n" - " local volatile float *localvolatilefloatp)\n" - "{}\n", - "\n" - "kernel void local_volatile_scalar_restrict_p(local volatile void* restrict localvolatilevoidrestrictp,\n" - " local volatile char * restrict localvolatilecharrestrictp,\n" - " local volatile uchar*restrict localvolatileucharrestrictp,\n" - " local volatile unsigned char *restrict localvolatileunsignedcharrestrictp,\n" - " local volatile short* restrict localvolatileshortrestrictp,\n" - " local volatile ushort * restrict localvolatileushortrestrictp,\n" - " local volatile unsigned short*restrict localvolatileunsignedshortrestrictp,\n" - " local volatile int *restrict localvolatileintrestrictp,\n" - " local volatile uint* restrict localvolatileuintrestrictp,\n" - " local volatile unsigned int * restrict localvolatileunsignedintrestrictp,\n" - " local volatile float * restrict localvolatilefloatrestrictp)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_scalar_p(local const volatile void*localconstvolatilevoidp,\n" - " local const volatile char *localconstvolatilecharp,\n" - " local const volatile uchar* localconstvolatileucharp,\n" - " local const volatile unsigned char * localconstvolatileunsignedcharp,\n" - " local const volatile short*localconstvolatileshortp,\n" - " local const volatile ushort *localconstvolatileushortp,\n" - " local const volatile unsigned short* localconstvolatileunsignedshortp,\n" - " local const volatile int * localconstvolatileintp,\n" - " local const volatile uint*localconstvolatileuintp,\n" - " local const volatile unsigned int *localconstvolatileunsignedintp,\n" - " local const volatile float *localconstvolatilefloatp)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_scalar_restrict_p(local const volatile void* restrict localconstvolatilevoidrestrictp,\n" - " local const volatile char * restrict localconstvolatilecharrestrictp,\n" - " local const volatile uchar*restrict localconstvolatileucharrestrictp,\n" - " local const volatile unsigned char *restrict localconstvolatileunsignedcharrestrictp,\n" - " local const volatile short* restrict localconstvolatileshortrestrictp,\n" - " local const volatile ushort * restrict localconstvolatileushortrestrictp,\n" - " local const volatile unsigned short*restrict localconstvolatileunsignedshortrestrictp,\n" - " local const volatile int *restrict localconstvolatileintrestrictp,\n" - " local const volatile uint* restrict localconstvolatileuintrestrictp,\n" - " local const volatile unsigned int * restrict localconstvolatileunsignedintrestrictp,\n" - " local const volatile float * restrict localconstvolatilefloatrestrictp)\n" - "{}\n", - "\n" - "kernel void scalar_d(char chard,\n" - " uchar uchard,\n" - " unsigned char unsignedchard,\n" - " short shortd,\n" - " ushort ushortd,\n" - " unsigned short unsignedshortd,\n" - " int intd,\n" - " uint uintd,\n" - " unsigned int unsignedintd,\n" - " float floatd)\n" - "{}\n", - "\n" - "kernel void const_scalar_d(const char constchard,\n" - " const uchar constuchard,\n" - " const unsigned char constunsignedchard,\n" - " const short constshortd,\n" - " const ushort constushortd,\n" - " const unsigned short constunsignedshortd,\n" - " const int constintd,\n" - " const uint constuintd,\n" - " const unsigned int constunsignedintd,\n" - " const float constfloatd)\n" - "{}\n", - "\n" - "kernel void private_scalar_d(private char privatechard,\n" - " private uchar privateuchard,\n" - " private unsigned char privateunsignedchard,\n" - " private short privateshortd,\n" - " private ushort privateushortd,\n" - " private unsigned short privateunsignedshortd,\n" - " private int privateintd,\n" - " private uint privateuintd,\n" - " private unsigned int privateunsignedintd,\n" - " private float privatefloatd)\n" - "{}\n", - "\n" - "kernel void private_const_scalar_d(private const char privateconstchard,\n" - " private const uchar privateconstuchard,\n" - " private const unsigned char privateconstunsignedchard,\n" - " private const short privateconstshortd,\n" - " private const ushort privateconstushortd,\n" - " private const unsigned short privateconstunsignedshortd,\n" - " private const int privateconstintd,\n" - " private const uint privateconstuintd,\n" - " private const unsigned int privateconstunsignedintd,\n" - " private const float privateconstfloatd)\n" - "{}\n", - "\n" - "kernel void constant_vector2_p0(constant char2*constantchar2p,\n" - " constant uchar2 *constantuchar2p,\n" - " constant short2* constantshort2p,\n" - " constant ushort2 * constantushort2p)\n" - "{}\n", - "\n" - "kernel void constant_vector2_p1(constant int2*constantint2p,\n" - " constant uint2 *constantuint2p)\n" - "{}\n", - "\n" - "kernel void constant_vector2_p2(constant float2*constantfloat2p)\n" - "{}\n", - "\n" - "kernel void constant_vector2_restrict_p0(constant char2 *restrict constantchar2restrictp,\n" - " constant uchar2* restrict constantuchar2restrictp,\n" - " constant short2 * restrict constantshort2restrictp,\n" - " constant ushort2*restrict constantushort2restrictp)\n" - "{}\n", - "\n" - "kernel void constant_vector2_restrict_p1(constant int2 *restrict constantint2restrictp,\n" - " constant uint2* restrict constantuint2restrictp)\n" - "{}\n", - "\n" - "kernel void constant_vector2_restrict_p2(constant float2 *restrict constantfloat2restrictp)\n" - "{}\n", - "\n" - "kernel void global_vector2_p(global char2*globalchar2p,\n" - " global uchar2 *globaluchar2p,\n" - " global short2* globalshort2p,\n" - " global ushort2 * globalushort2p,\n" - " global int2*globalint2p,\n" - " global uint2 *globaluint2p,\n" - " global float2*globalfloat2p)\n" - "{}\n", - "\n" - "kernel void global_vector2_restrict_p(global char2 *restrict globalchar2restrictp,\n" - " global uchar2* restrict globaluchar2restrictp,\n" - " global short2 * restrict globalshort2restrictp,\n" - " global ushort2*restrict globalushort2restrictp,\n" - " global int2 *restrict globalint2restrictp,\n" - " global uint2* restrict globaluint2restrictp,\n" - " global float2 *restrict globalfloat2restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_vector2_p(global const char2* globalconstchar2p,\n" - " global const uchar2 * globalconstuchar2p,\n" - " global const short2*globalconstshort2p,\n" - " global const ushort2 *globalconstushort2p,\n" - " global const int2* globalconstint2p,\n" - " global const uint2 * globalconstuint2p,\n" - " global const float2* globalconstfloat2p)\n" - "{}\n", - "\n" - "kernel void global_const_vector2_restrict_p(global const char2 * restrict globalconstchar2restrictp,\n" - " global const uchar2*restrict globalconstuchar2restrictp,\n" - " global const short2 *restrict globalconstshort2restrictp,\n" - " global const ushort2* restrict globalconstushort2restrictp,\n" - " global const int2 * restrict globalconstint2restrictp,\n" - " global const uint2*restrict globalconstuint2restrictp,\n" - " global const float2 * restrict globalconstfloat2restrictp)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector2_p(global volatile char2*globalvolatilechar2p,\n" - " global volatile uchar2 *globalvolatileuchar2p,\n" - " global volatile short2* globalvolatileshort2p,\n" - " global volatile ushort2 * globalvolatileushort2p,\n" - " global volatile int2*globalvolatileint2p,\n" - " global volatile uint2 *globalvolatileuint2p,\n" - " global volatile float2*globalvolatilefloat2p)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector2_restrict_p(global volatile char2 *restrict globalvolatilechar2restrictp,\n" - " global volatile uchar2* restrict globalvolatileuchar2restrictp,\n" - " global volatile short2 * restrict globalvolatileshort2restrictp,\n" - " global volatile ushort2*restrict globalvolatileushort2restrictp,\n" - " global volatile int2 *restrict globalvolatileint2restrictp,\n" - " global volatile uint2* restrict globalvolatileuint2restrictp,\n" - " global volatile float2 *restrict globalvolatilefloat2restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector2_p(global const volatile char2* globalconstvolatilechar2p,\n" - " global const volatile uchar2 * globalconstvolatileuchar2p,\n" - " global const volatile short2*globalconstvolatileshort2p,\n" - " global const volatile ushort2 *globalconstvolatileushort2p,\n" - " global const volatile int2* globalconstvolatileint2p,\n" - " global const volatile uint2 * globalconstvolatileuint2p,\n" - " global const volatile float2* globalconstvolatilefloat2p)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector2_restrict_p(global const volatile char2 * restrict globalconstvolatilechar2restrictp,\n" - " global const volatile uchar2*restrict globalconstvolatileuchar2restrictp,\n" - " global const volatile short2 *restrict globalconstvolatileshort2restrictp,\n" - " global const volatile ushort2* restrict globalconstvolatileushort2restrictp,\n" - " global const volatile int2 * restrict globalconstvolatileint2restrictp,\n" - " global const volatile uint2*restrict globalconstvolatileuint2restrictp,\n" - " global const volatile float2 * restrict globalconstvolatilefloat2restrictp)\n" - "{}\n", - "\n" - "kernel void local_vector2_p(local char2*localchar2p,\n" - " local uchar2 *localuchar2p,\n" - " local short2* localshort2p,\n" - " local ushort2 * localushort2p,\n" - " local int2*localint2p,\n" - " local uint2 *localuint2p,\n" - " local float2*localfloat2p)\n" - "{}\n", - "\n" - "kernel void local_vector2_restrict_p(local char2 *restrict localchar2restrictp,\n" - " local uchar2* restrict localuchar2restrictp,\n" - " local short2 * restrict localshort2restrictp,\n" - " local ushort2*restrict localushort2restrictp,\n" - " local int2 *restrict localint2restrictp,\n" - " local uint2* restrict localuint2restrictp,\n" - " local float2 *restrict localfloat2restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_vector2_p(local const char2* localconstchar2p,\n" - " local const uchar2 * localconstuchar2p,\n" - " local const short2*localconstshort2p,\n" - " local const ushort2 *localconstushort2p,\n" - " local const int2* localconstint2p,\n" - " local const uint2 * localconstuint2p,\n" - " local const float2* localconstfloat2p)\n" - "{}\n", - "\n" - "kernel void local_const_vector2_restrict_p(local const char2 * restrict localconstchar2restrictp,\n" - " local const uchar2*restrict localconstuchar2restrictp,\n" - " local const short2 *restrict localconstshort2restrictp,\n" - " local const ushort2* restrict localconstushort2restrictp,\n" - " local const int2 * restrict localconstint2restrictp,\n" - " local const uint2*restrict localconstuint2restrictp,\n" - " local const float2 * restrict localconstfloat2restrictp)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector2_p(local volatile char2*localvolatilechar2p,\n" - " local volatile uchar2 *localvolatileuchar2p,\n" - " local volatile short2* localvolatileshort2p,\n" - " local volatile ushort2 * localvolatileushort2p,\n" - " local volatile int2*localvolatileint2p,\n" - " local volatile uint2 *localvolatileuint2p,\n" - " local volatile float2*localvolatilefloat2p)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector2_restrict_p(local volatile char2 *restrict localvolatilechar2restrictp,\n" - " local volatile uchar2* restrict localvolatileuchar2restrictp,\n" - " local volatile short2 * restrict localvolatileshort2restrictp,\n" - " local volatile ushort2*restrict localvolatileushort2restrictp,\n" - " local volatile int2 *restrict localvolatileint2restrictp,\n" - " local volatile uint2* restrict localvolatileuint2restrictp,\n" - " local volatile float2 *restrict localvolatilefloat2restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector2_p(local const volatile char2* localconstvolatilechar2p,\n" - " local const volatile uchar2 * localconstvolatileuchar2p,\n" - " local const volatile short2*localconstvolatileshort2p,\n" - " local const volatile ushort2 *localconstvolatileushort2p,\n" - " local const volatile int2* localconstvolatileint2p,\n" - " local const volatile uint2 * localconstvolatileuint2p,\n" - " local const volatile float2* localconstvolatilefloat2p)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector2_restrict_p(local const volatile char2 * restrict localconstvolatilechar2restrictp,\n" - " local const volatile uchar2*restrict localconstvolatileuchar2restrictp,\n" - " local const volatile short2 *restrict localconstvolatileshort2restrictp,\n" - " local const volatile ushort2* restrict localconstvolatileushort2restrictp,\n" - " local const volatile int2 * restrict localconstvolatileint2restrictp,\n" - " local const volatile uint2*restrict localconstvolatileuint2restrictp,\n" - " local const volatile float2 * restrict localconstvolatilefloat2restrictp)\n" - "{}\n", - "\n" - "kernel void vector2_d(char2 char2d,\n" - " uchar2 uchar2d,\n" - " short2 short2d,\n" - " ushort2 ushort2d,\n" - " int2 int2d,\n" - " uint2 uint2d,\n" - " float2 float2d)\n" - "{}\n", - "\n" - "kernel void const_vector2_d(const char2 constchar2d,\n" - " const uchar2 constuchar2d,\n" - " const short2 constshort2d,\n" - " const ushort2 constushort2d,\n" - " const int2 constint2d,\n" - " const uint2 constuint2d,\n" - " const float2 constfloat2d)\n" - "{}\n", - "\n" - "kernel void private_vector2_d(private char2 privatechar2d,\n" - " private uchar2 privateuchar2d,\n" - " private short2 privateshort2d,\n" - " private ushort2 privateushort2d,\n" - " private int2 privateint2d,\n" - " private uint2 privateuint2d,\n" - " private float2 privatefloat2d)\n" - "{}\n", - "\n" - "kernel void private_const_vector2_d(private const char2 privateconstchar2d,\n" - " private const uchar2 privateconstuchar2d,\n" - " private const short2 privateconstshort2d,\n" - " private const ushort2 privateconstushort2d,\n" - " private const int2 privateconstint2d,\n" - " private const uint2 privateconstuint2d,\n" - " private const float2 privateconstfloat2d)\n" - "{}\n", - "\n" - "kernel void constant_vector3_p0(constant char3*constantchar3p,\n" - " constant uchar3 *constantuchar3p,\n" - " constant short3* constantshort3p,\n" - " constant ushort3 * constantushort3p)\n" - "{}\n", - "\n" - "kernel void constant_vector3_p1(constant int3*constantint3p,\n" - " constant uint3 *constantuint3p)\n" - "{}\n", - "\n" - "kernel void constant_vector3_p2(constant float3*constantfloat3p)\n" - "{}\n", - "\n" - "kernel void constant_vector3_restrict_p0(constant char3 *restrict constantchar3restrictp,\n" - " constant uchar3* restrict constantuchar3restrictp,\n" - " constant short3 * restrict constantshort3restrictp,\n" - " constant ushort3*restrict constantushort3restrictp)\n" - "{}\n", - "\n" - "kernel void constant_vector3_restrict_p1(constant int3 *restrict constantint3restrictp,\n" - " constant uint3* restrict constantuint3restrictp)\n" - "{}\n", - "\n" - "kernel void constant_vector3_restrict_p2(constant float3 *restrict constantfloat3restrictp)\n" - "{}\n", - "\n" - "kernel void global_vector3_p(global char3*globalchar3p,\n" - " global uchar3 *globaluchar3p,\n" - " global short3* globalshort3p,\n" - " global ushort3 * globalushort3p,\n" - " global int3*globalint3p,\n" - " global uint3 *globaluint3p,\n" - " global float3*globalfloat3p)\n" - "{}\n", - "\n" - "kernel void global_vector3_restrict_p(global char3 *restrict globalchar3restrictp,\n" - " global uchar3* restrict globaluchar3restrictp,\n" - " global short3 * restrict globalshort3restrictp,\n" - " global ushort3*restrict globalushort3restrictp,\n" - " global int3 *restrict globalint3restrictp,\n" - " global uint3* restrict globaluint3restrictp,\n" - " global float3 *restrict globalfloat3restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_vector3_p(global const char3* globalconstchar3p,\n" - " global const uchar3 * globalconstuchar3p,\n" - " global const short3*globalconstshort3p,\n" - " global const ushort3 *globalconstushort3p,\n" - " global const int3* globalconstint3p,\n" - " global const uint3 * globalconstuint3p,\n" - " global const float3* globalconstfloat3p)\n" - "{}\n", - "\n" - "kernel void global_const_vector3_restrict_p(global const char3 * restrict globalconstchar3restrictp,\n" - " global const uchar3*restrict globalconstuchar3restrictp,\n" - " global const short3 *restrict globalconstshort3restrictp,\n" - " global const ushort3* restrict globalconstushort3restrictp,\n" - " global const int3 * restrict globalconstint3restrictp,\n" - " global const uint3*restrict globalconstuint3restrictp,\n" - " global const float3 * restrict globalconstfloat3restrictp)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector3_p(global volatile char3*globalvolatilechar3p,\n" - " global volatile uchar3 *globalvolatileuchar3p,\n" - " global volatile short3* globalvolatileshort3p,\n" - " global volatile ushort3 * globalvolatileushort3p,\n" - " global volatile int3*globalvolatileint3p,\n" - " global volatile uint3 *globalvolatileuint3p,\n" - " global volatile float3*globalvolatilefloat3p)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector3_restrict_p(global volatile char3 *restrict globalvolatilechar3restrictp,\n" - " global volatile uchar3* restrict globalvolatileuchar3restrictp,\n" - " global volatile short3 * restrict globalvolatileshort3restrictp,\n" - " global volatile ushort3*restrict globalvolatileushort3restrictp,\n" - " global volatile int3 *restrict globalvolatileint3restrictp,\n" - " global volatile uint3* restrict globalvolatileuint3restrictp,\n" - " global volatile float3 *restrict globalvolatilefloat3restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector3_p(global const volatile char3* globalconstvolatilechar3p,\n" - " global const volatile uchar3 * globalconstvolatileuchar3p,\n" - " global const volatile short3*globalconstvolatileshort3p,\n" - " global const volatile ushort3 *globalconstvolatileushort3p,\n" - " global const volatile int3* globalconstvolatileint3p,\n" - " global const volatile uint3 * globalconstvolatileuint3p,\n" - " global const volatile float3* globalconstvolatilefloat3p)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector3_restrict_p(global const volatile char3 * restrict globalconstvolatilechar3restrictp,\n" - " global const volatile uchar3*restrict globalconstvolatileuchar3restrictp,\n" - " global const volatile short3 *restrict globalconstvolatileshort3restrictp,\n" - " global const volatile ushort3* restrict globalconstvolatileushort3restrictp,\n" - " global const volatile int3 * restrict globalconstvolatileint3restrictp,\n" - " global const volatile uint3*restrict globalconstvolatileuint3restrictp,\n" - " global const volatile float3 * restrict globalconstvolatilefloat3restrictp)\n" - "{}\n", - "\n" - "kernel void local_vector3_p(local char3*localchar3p,\n" - " local uchar3 *localuchar3p,\n" - " local short3* localshort3p,\n" - " local ushort3 * localushort3p,\n" - " local int3*localint3p,\n" - " local uint3 *localuint3p,\n" - " local float3*localfloat3p)\n" - "{}\n", - "\n" - "kernel void local_vector3_restrict_p(local char3 *restrict localchar3restrictp,\n" - " local uchar3* restrict localuchar3restrictp,\n" - " local short3 * restrict localshort3restrictp,\n" - " local ushort3*restrict localushort3restrictp,\n" - " local int3 *restrict localint3restrictp,\n" - " local uint3* restrict localuint3restrictp,\n" - " local float3 *restrict localfloat3restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_vector3_p(local const char3* localconstchar3p,\n" - " local const uchar3 * localconstuchar3p,\n" - " local const short3*localconstshort3p,\n" - " local const ushort3 *localconstushort3p,\n" - " local const int3* localconstint3p,\n" - " local const uint3 * localconstuint3p,\n" - " local const float3* localconstfloat3p)\n" - "{}\n", - "\n" - "kernel void local_const_vector3_restrict_p(local const char3 * restrict localconstchar3restrictp,\n" - " local const uchar3*restrict localconstuchar3restrictp,\n" - " local const short3 *restrict localconstshort3restrictp,\n" - " local const ushort3* restrict localconstushort3restrictp,\n" - " local const int3 * restrict localconstint3restrictp,\n" - " local const uint3*restrict localconstuint3restrictp,\n" - " local const float3 * restrict localconstfloat3restrictp)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector3_p(local volatile char3*localvolatilechar3p,\n" - " local volatile uchar3 *localvolatileuchar3p,\n" - " local volatile short3* localvolatileshort3p,\n" - " local volatile ushort3 * localvolatileushort3p,\n" - " local volatile int3*localvolatileint3p,\n" - " local volatile uint3 *localvolatileuint3p,\n" - " local volatile float3*localvolatilefloat3p)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector3_restrict_p(local volatile char3 *restrict localvolatilechar3restrictp,\n" - " local volatile uchar3* restrict localvolatileuchar3restrictp,\n" - " local volatile short3 * restrict localvolatileshort3restrictp,\n" - " local volatile ushort3*restrict localvolatileushort3restrictp,\n" - " local volatile int3 *restrict localvolatileint3restrictp,\n" - " local volatile uint3* restrict localvolatileuint3restrictp,\n" - " local volatile float3 *restrict localvolatilefloat3restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector3_p(local const volatile char3* localconstvolatilechar3p,\n" - " local const volatile uchar3 * localconstvolatileuchar3p,\n" - " local const volatile short3*localconstvolatileshort3p,\n" - " local const volatile ushort3 *localconstvolatileushort3p,\n" - " local const volatile int3* localconstvolatileint3p,\n" - " local const volatile uint3 * localconstvolatileuint3p,\n" - " local const volatile float3* localconstvolatilefloat3p)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector3_restrict_p(local const volatile char3 * restrict localconstvolatilechar3restrictp,\n" - " local const volatile uchar3*restrict localconstvolatileuchar3restrictp,\n" - " local const volatile short3 *restrict localconstvolatileshort3restrictp,\n" - " local const volatile ushort3* restrict localconstvolatileushort3restrictp,\n" - " local const volatile int3 * restrict localconstvolatileint3restrictp,\n" - " local const volatile uint3*restrict localconstvolatileuint3restrictp,\n" - " local const volatile float3 * restrict localconstvolatilefloat3restrictp)\n" - "{}\n", - "\n" - "kernel void vector3_d(char3 char3d,\n" - " uchar3 uchar3d,\n" - " short3 short3d,\n" - " ushort3 ushort3d,\n" - " int3 int3d,\n" - " uint3 uint3d,\n" - " float3 float3d)\n" - "{}\n", - "\n" - "kernel void const_vector3_d(const char3 constchar3d,\n" - " const uchar3 constuchar3d,\n" - " const short3 constshort3d,\n" - " const ushort3 constushort3d,\n" - " const int3 constint3d,\n" - " const uint3 constuint3d,\n" - " const float3 constfloat3d)\n" - "{}\n", - "\n" - "kernel void private_vector3_d(private char3 privatechar3d,\n" - " private uchar3 privateuchar3d,\n" - " private short3 privateshort3d,\n" - " private ushort3 privateushort3d,\n" - " private int3 privateint3d,\n" - " private uint3 privateuint3d,\n" - " private float3 privatefloat3d)\n" - "{}\n", - "\n" - "kernel void private_const_vector3_d(private const char3 privateconstchar3d,\n" - " private const uchar3 privateconstuchar3d,\n" - " private const short3 privateconstshort3d,\n" - " private const ushort3 privateconstushort3d,\n" - " private const int3 privateconstint3d,\n" - " private const uint3 privateconstuint3d,\n" - " private const float3 privateconstfloat3d)\n" - "{}\n", - "\n" - "kernel void constant_vector4_p0(constant char4*constantchar4p,\n" - " constant uchar4 *constantuchar4p,\n" - " constant short4* constantshort4p,\n" - " constant ushort4 * constantushort4p)\n" - "{}\n", - "\n" - "kernel void constant_vector4_p1(constant int4*constantint4p,\n" - " constant uint4 *constantuint4p)\n" - "{}\n", - "\n" - "kernel void constant_vector4_p2(constant float4*constantfloat4p)\n" - "{}\n", - "\n" - "kernel void constant_vector4_restrict_p0(constant char4 *restrict constantchar4restrictp,\n" - " constant uchar4* restrict constantuchar4restrictp,\n" - " constant short4 * restrict constantshort4restrictp,\n" - " constant ushort4*restrict constantushort4restrictp)\n" - "{}\n", - "\n" - "kernel void constant_vector4_restrict_p1(constant int4 *restrict constantint4restrictp,\n" - " constant uint4* restrict constantuint4restrictp)\n" - "{}\n", - "\n" - "kernel void constant_vector4_restrict_p2(constant float4 *restrict constantfloat4restrictp)\n" - "{}\n", - "\n" - "kernel void global_vector4_p(global char4*globalchar4p,\n" - " global uchar4 *globaluchar4p,\n" - " global short4* globalshort4p,\n" - " global ushort4 * globalushort4p,\n" - " global int4*globalint4p,\n" - " global uint4 *globaluint4p,\n" - " global float4*globalfloat4p)\n" - "{}\n", - "\n" - "kernel void global_vector4_restrict_p(global char4 *restrict globalchar4restrictp,\n" - " global uchar4* restrict globaluchar4restrictp,\n" - " global short4 * restrict globalshort4restrictp,\n" - " global ushort4*restrict globalushort4restrictp,\n" - " global int4 *restrict globalint4restrictp,\n" - " global uint4* restrict globaluint4restrictp,\n" - " global float4 *restrict globalfloat4restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_vector4_p(global const char4* globalconstchar4p,\n" - " global const uchar4 * globalconstuchar4p,\n" - " global const short4*globalconstshort4p,\n" - " global const ushort4 *globalconstushort4p,\n" - " global const int4* globalconstint4p,\n" - " global const uint4 * globalconstuint4p,\n" - " global const float4* globalconstfloat4p)\n" - "{}\n", - "\n" - "kernel void global_const_vector4_restrict_p(global const char4 * restrict globalconstchar4restrictp,\n" - " global const uchar4*restrict globalconstuchar4restrictp,\n" - " global const short4 *restrict globalconstshort4restrictp,\n" - " global const ushort4* restrict globalconstushort4restrictp,\n" - " global const int4 * restrict globalconstint4restrictp,\n" - " global const uint4*restrict globalconstuint4restrictp,\n" - " global const float4 * restrict globalconstfloat4restrictp)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector4_p(global volatile char4*globalvolatilechar4p,\n" - " global volatile uchar4 *globalvolatileuchar4p,\n" - " global volatile short4* globalvolatileshort4p,\n" - " global volatile ushort4 * globalvolatileushort4p,\n" - " global volatile int4*globalvolatileint4p,\n" - " global volatile uint4 *globalvolatileuint4p,\n" - " global volatile float4*globalvolatilefloat4p)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector4_restrict_p(global volatile char4 *restrict globalvolatilechar4restrictp,\n" - " global volatile uchar4* restrict globalvolatileuchar4restrictp,\n" - " global volatile short4 * restrict globalvolatileshort4restrictp,\n" - " global volatile ushort4*restrict globalvolatileushort4restrictp,\n" - " global volatile int4 *restrict globalvolatileint4restrictp,\n" - " global volatile uint4* restrict globalvolatileuint4restrictp,\n" - " global volatile float4 *restrict globalvolatilefloat4restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector4_p(global const volatile char4* globalconstvolatilechar4p,\n" - " global const volatile uchar4 * globalconstvolatileuchar4p,\n" - " global const volatile short4*globalconstvolatileshort4p,\n" - " global const volatile ushort4 *globalconstvolatileushort4p,\n" - " global const volatile int4* globalconstvolatileint4p,\n" - " global const volatile uint4 * globalconstvolatileuint4p,\n" - " global const volatile float4* globalconstvolatilefloat4p)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector4_restrict_p(global const volatile char4 * restrict globalconstvolatilechar4restrictp,\n" - " global const volatile uchar4*restrict globalconstvolatileuchar4restrictp,\n" - " global const volatile short4 *restrict globalconstvolatileshort4restrictp,\n" - " global const volatile ushort4* restrict globalconstvolatileushort4restrictp,\n" - " global const volatile int4 * restrict globalconstvolatileint4restrictp,\n" - " global const volatile uint4*restrict globalconstvolatileuint4restrictp,\n" - " global const volatile float4 * restrict globalconstvolatilefloat4restrictp)\n" - "{}\n", - "\n" - "kernel void local_vector4_p(local char4*localchar4p,\n" - " local uchar4 *localuchar4p,\n" - " local short4* localshort4p,\n" - " local ushort4 * localushort4p,\n" - " local int4*localint4p,\n" - " local uint4 *localuint4p,\n" - " local float4*localfloat4p)\n" - "{}\n", - "\n" - "kernel void local_vector4_restrict_p(local char4 *restrict localchar4restrictp,\n" - " local uchar4* restrict localuchar4restrictp,\n" - " local short4 * restrict localshort4restrictp,\n" - " local ushort4*restrict localushort4restrictp,\n" - " local int4 *restrict localint4restrictp,\n" - " local uint4* restrict localuint4restrictp,\n" - " local float4 *restrict localfloat4restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_vector4_p(local const char4* localconstchar4p,\n" - " local const uchar4 * localconstuchar4p,\n" - " local const short4*localconstshort4p,\n" - " local const ushort4 *localconstushort4p,\n" - " local const int4* localconstint4p,\n" - " local const uint4 * localconstuint4p,\n" - " local const float4* localconstfloat4p)\n" - "{}\n", - "\n" - "kernel void local_const_vector4_restrict_p(local const char4 * restrict localconstchar4restrictp,\n" - " local const uchar4*restrict localconstuchar4restrictp,\n" - " local const short4 *restrict localconstshort4restrictp,\n" - " local const ushort4* restrict localconstushort4restrictp,\n" - " local const int4 * restrict localconstint4restrictp,\n" - " local const uint4*restrict localconstuint4restrictp,\n" - " local const float4 * restrict localconstfloat4restrictp)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector4_p(local volatile char4*localvolatilechar4p,\n" - " local volatile uchar4 *localvolatileuchar4p,\n" - " local volatile short4* localvolatileshort4p,\n" - " local volatile ushort4 * localvolatileushort4p,\n" - " local volatile int4*localvolatileint4p,\n" - " local volatile uint4 *localvolatileuint4p,\n" - " local volatile float4*localvolatilefloat4p)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector4_restrict_p(local volatile char4 *restrict localvolatilechar4restrictp,\n" - " local volatile uchar4* restrict localvolatileuchar4restrictp,\n" - " local volatile short4 * restrict localvolatileshort4restrictp,\n" - " local volatile ushort4*restrict localvolatileushort4restrictp,\n" - " local volatile int4 *restrict localvolatileint4restrictp,\n" - " local volatile uint4* restrict localvolatileuint4restrictp,\n" - " local volatile float4 *restrict localvolatilefloat4restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector4_p(local const volatile char4* localconstvolatilechar4p,\n" - " local const volatile uchar4 * localconstvolatileuchar4p,\n" - " local const volatile short4*localconstvolatileshort4p,\n" - " local const volatile ushort4 *localconstvolatileushort4p,\n" - " local const volatile int4* localconstvolatileint4p,\n" - " local const volatile uint4 * localconstvolatileuint4p,\n" - " local const volatile float4* localconstvolatilefloat4p)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector4_restrict_p(local const volatile char4 * restrict localconstvolatilechar4restrictp,\n" - " local const volatile uchar4*restrict localconstvolatileuchar4restrictp,\n" - " local const volatile short4 *restrict localconstvolatileshort4restrictp,\n" - " local const volatile ushort4* restrict localconstvolatileushort4restrictp,\n" - " local const volatile int4 * restrict localconstvolatileint4restrictp,\n" - " local const volatile uint4*restrict localconstvolatileuint4restrictp,\n" - " local const volatile float4 * restrict localconstvolatilefloat4restrictp)\n" - "{}\n", - "\n" - "kernel void vector4_d(char4 char4d,\n" - " uchar4 uchar4d,\n" - " short4 short4d,\n" - " ushort4 ushort4d,\n" - " int4 int4d,\n" - " uint4 uint4d,\n" - " float4 float4d)\n" - "{}\n", - "\n" - "kernel void const_vector4_d(const char4 constchar4d,\n" - " const uchar4 constuchar4d,\n" - " const short4 constshort4d,\n" - " const ushort4 constushort4d,\n" - " const int4 constint4d,\n" - " const uint4 constuint4d,\n" - " const float4 constfloat4d)\n" - "{}\n", - "\n" - "kernel void private_vector4_d(private char4 privatechar4d,\n" - " private uchar4 privateuchar4d,\n" - " private short4 privateshort4d,\n" - " private ushort4 privateushort4d,\n" - " private int4 privateint4d,\n" - " private uint4 privateuint4d,\n" - " private float4 privatefloat4d)\n" - "{}\n", - "\n" - "kernel void private_const_vector4_d(private const char4 privateconstchar4d,\n" - " private const uchar4 privateconstuchar4d,\n" - " private const short4 privateconstshort4d,\n" - " private const ushort4 privateconstushort4d,\n" - " private const int4 privateconstint4d,\n" - " private const uint4 privateconstuint4d,\n" - " private const float4 privateconstfloat4d)\n" - "{}\n", - "\n" - "kernel void constant_vector8_p0(constant char8*constantchar8p,\n" - " constant uchar8 *constantuchar8p,\n" - " constant short8* constantshort8p,\n" - " constant ushort8 * constantushort8p)\n" - "{}\n", - "\n" - "kernel void constant_vector8_p1(constant int8*constantint8p,\n" - " constant uint8 *constantuint8p)\n" - "{}\n", - "\n" - "kernel void constant_vector8_p2(constant float8*constantfloat8p)\n" - "{}\n", - "\n" - "kernel void constant_vector8_restrict_p0(constant char8 *restrict constantchar8restrictp,\n" - " constant uchar8* restrict constantuchar8restrictp,\n" - " constant short8 * restrict constantshort8restrictp,\n" - " constant ushort8*restrict constantushort8restrictp)\n" - "{}\n", - "\n" - "kernel void constant_vector8_restrict_p1(constant int8 *restrict constantint8restrictp,\n" - " constant uint8* restrict constantuint8restrictp)\n" - "{}\n", - "\n" - "kernel void constant_vector8_restrict_p2(constant float8 *restrict constantfloat8restrictp)\n" - "{}\n", - "\n" - "kernel void global_vector8_p(global char8*globalchar8p,\n" - " global uchar8 *globaluchar8p,\n" - " global short8* globalshort8p,\n" - " global ushort8 * globalushort8p,\n" - " global int8*globalint8p,\n" - " global uint8 *globaluint8p,\n" - " global float8*globalfloat8p)\n" - "{}\n", - "\n" - "kernel void global_vector8_restrict_p(global char8 *restrict globalchar8restrictp,\n" - " global uchar8* restrict globaluchar8restrictp,\n" - " global short8 * restrict globalshort8restrictp,\n" - " global ushort8*restrict globalushort8restrictp,\n" - " global int8 *restrict globalint8restrictp,\n" - " global uint8* restrict globaluint8restrictp,\n" - " global float8 *restrict globalfloat8restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_vector8_p(global const char8* globalconstchar8p,\n" - " global const uchar8 * globalconstuchar8p,\n" - " global const short8*globalconstshort8p,\n" - " global const ushort8 *globalconstushort8p,\n" - " global const int8* globalconstint8p,\n" - " global const uint8 * globalconstuint8p,\n" - " global const float8* globalconstfloat8p)\n" - "{}\n", - "\n" - "kernel void global_const_vector8_restrict_p(global const char8 * restrict globalconstchar8restrictp,\n" - " global const uchar8*restrict globalconstuchar8restrictp,\n" - " global const short8 *restrict globalconstshort8restrictp,\n" - " global const ushort8* restrict globalconstushort8restrictp,\n" - " global const int8 * restrict globalconstint8restrictp,\n" - " global const uint8*restrict globalconstuint8restrictp,\n" - " global const float8 * restrict globalconstfloat8restrictp)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector8_p(global volatile char8*globalvolatilechar8p,\n" - " global volatile uchar8 *globalvolatileuchar8p,\n" - " global volatile short8* globalvolatileshort8p,\n" - " global volatile ushort8 * globalvolatileushort8p,\n" - " global volatile int8*globalvolatileint8p,\n" - " global volatile uint8 *globalvolatileuint8p,\n" - " global volatile float8*globalvolatilefloat8p)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector8_restrict_p(global volatile char8 *restrict globalvolatilechar8restrictp,\n" - " global volatile uchar8* restrict globalvolatileuchar8restrictp,\n" - " global volatile short8 * restrict globalvolatileshort8restrictp,\n" - " global volatile ushort8*restrict globalvolatileushort8restrictp,\n" - " global volatile int8 *restrict globalvolatileint8restrictp,\n" - " global volatile uint8* restrict globalvolatileuint8restrictp,\n" - " global volatile float8 *restrict globalvolatilefloat8restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector8_p(global const volatile char8* globalconstvolatilechar8p,\n" - " global const volatile uchar8 * globalconstvolatileuchar8p,\n" - " global const volatile short8*globalconstvolatileshort8p,\n" - " global const volatile ushort8 *globalconstvolatileushort8p,\n" - " global const volatile int8* globalconstvolatileint8p,\n" - " global const volatile uint8 * globalconstvolatileuint8p,\n" - " global const volatile float8* globalconstvolatilefloat8p)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector8_restrict_p(global const volatile char8 * restrict globalconstvolatilechar8restrictp,\n" - " global const volatile uchar8*restrict globalconstvolatileuchar8restrictp,\n" - " global const volatile short8 *restrict globalconstvolatileshort8restrictp,\n" - " global const volatile ushort8* restrict globalconstvolatileushort8restrictp,\n" - " global const volatile int8 * restrict globalconstvolatileint8restrictp,\n" - " global const volatile uint8*restrict globalconstvolatileuint8restrictp,\n" - " global const volatile float8 * restrict globalconstvolatilefloat8restrictp)\n" - "{}\n", - "\n" - "kernel void local_vector8_p(local char8*localchar8p,\n" - " local uchar8 *localuchar8p,\n" - " local short8* localshort8p,\n" - " local ushort8 * localushort8p,\n" - " local int8*localint8p,\n" - " local uint8 *localuint8p,\n" - " local float8*localfloat8p)\n" - "{}\n", - "\n" - "kernel void local_vector8_restrict_p(local char8 *restrict localchar8restrictp,\n" - " local uchar8* restrict localuchar8restrictp,\n" - " local short8 * restrict localshort8restrictp,\n" - " local ushort8*restrict localushort8restrictp,\n" - " local int8 *restrict localint8restrictp,\n" - " local uint8* restrict localuint8restrictp,\n" - " local float8 *restrict localfloat8restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_vector8_p(local const char8* localconstchar8p,\n" - " local const uchar8 * localconstuchar8p,\n" - " local const short8*localconstshort8p,\n" - " local const ushort8 *localconstushort8p,\n" - " local const int8* localconstint8p,\n" - " local const uint8 * localconstuint8p,\n" - " local const float8* localconstfloat8p)\n" - "{}\n", - "\n" - "kernel void local_const_vector8_restrict_p(local const char8 * restrict localconstchar8restrictp,\n" - " local const uchar8*restrict localconstuchar8restrictp,\n" - " local const short8 *restrict localconstshort8restrictp,\n" - " local const ushort8* restrict localconstushort8restrictp,\n" - " local const int8 * restrict localconstint8restrictp,\n" - " local const uint8*restrict localconstuint8restrictp,\n" - " local const float8 * restrict localconstfloat8restrictp)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector8_p(local volatile char8*localvolatilechar8p,\n" - " local volatile uchar8 *localvolatileuchar8p,\n" - " local volatile short8* localvolatileshort8p,\n" - " local volatile ushort8 * localvolatileushort8p,\n" - " local volatile int8*localvolatileint8p,\n" - " local volatile uint8 *localvolatileuint8p,\n" - " local volatile float8*localvolatilefloat8p)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector8_restrict_p(local volatile char8 *restrict localvolatilechar8restrictp,\n" - " local volatile uchar8* restrict localvolatileuchar8restrictp,\n" - " local volatile short8 * restrict localvolatileshort8restrictp,\n" - " local volatile ushort8*restrict localvolatileushort8restrictp,\n" - " local volatile int8 *restrict localvolatileint8restrictp,\n" - " local volatile uint8* restrict localvolatileuint8restrictp,\n" - " local volatile float8 *restrict localvolatilefloat8restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector8_p(local const volatile char8* localconstvolatilechar8p,\n" - " local const volatile uchar8 * localconstvolatileuchar8p,\n" - " local const volatile short8*localconstvolatileshort8p,\n" - " local const volatile ushort8 *localconstvolatileushort8p,\n" - " local const volatile int8* localconstvolatileint8p,\n" - " local const volatile uint8 * localconstvolatileuint8p,\n" - " local const volatile float8* localconstvolatilefloat8p)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector8_restrict_p(local const volatile char8 * restrict localconstvolatilechar8restrictp,\n" - " local const volatile uchar8*restrict localconstvolatileuchar8restrictp,\n" - " local const volatile short8 *restrict localconstvolatileshort8restrictp,\n" - " local const volatile ushort8* restrict localconstvolatileushort8restrictp,\n" - " local const volatile int8 * restrict localconstvolatileint8restrictp,\n" - " local const volatile uint8*restrict localconstvolatileuint8restrictp,\n" - " local const volatile float8 * restrict localconstvolatilefloat8restrictp)\n" - "{}\n", - "\n" - "kernel void vector8_d(char8 char8d,\n" - " uchar8 uchar8d,\n" - " short8 short8d,\n" - " ushort8 ushort8d,\n" - " int8 int8d,\n" - " uint8 uint8d,\n" - " float8 float8d)\n" - "{}\n", - "\n" - "kernel void const_vector8_d(const char8 constchar8d,\n" - " const uchar8 constuchar8d,\n" - " const short8 constshort8d,\n" - " const ushort8 constushort8d,\n" - " const int8 constint8d,\n" - " const uint8 constuint8d,\n" - " const float8 constfloat8d)\n" - "{}\n", - "\n" - "kernel void private_vector8_d(private char8 privatechar8d,\n" - " private uchar8 privateuchar8d,\n" - " private short8 privateshort8d,\n" - " private ushort8 privateushort8d,\n" - " private int8 privateint8d,\n" - " private uint8 privateuint8d,\n" - " private float8 privatefloat8d)\n" - "{}\n", - "\n" - "kernel void private_const_vector8_d(private const char8 privateconstchar8d,\n" - " private const uchar8 privateconstuchar8d,\n" - " private const short8 privateconstshort8d,\n" - " private const ushort8 privateconstushort8d,\n" - " private const int8 privateconstint8d,\n" - " private const uint8 privateconstuint8d,\n" - " private const float8 privateconstfloat8d)\n" - "{}\n", - "\n" - "kernel void constant_vector16_p0(constant char16*constantchar16p,\n" - " constant uchar16 *constantuchar16p,\n" - " constant short16* constantshort16p,\n" - " constant ushort16 * constantushort16p)\n" - "{}\n", - "\n" - "kernel void constant_vector16_p1(constant int16*constantint16p,\n" - " constant uint16 *constantuint16p)\n" - "{}\n", - "\n" - "kernel void constant_vector16_p2(constant float16*constantfloat16p)\n" - "{}\n", - "\n" - "kernel void constant_vector16_restrict_p0(constant char16 *restrict constantchar16restrictp,\n" - " constant uchar16* restrict constantuchar16restrictp,\n" - " constant short16 * restrict constantshort16restrictp,\n" - " constant ushort16*restrict constantushort16restrictp)\n" - "{}\n", - "\n" - "kernel void constant_vector16_restrict_p1(constant int16 *restrict constantint16restrictp,\n" - " constant uint16* restrict constantuint16restrictp)\n" - "{}\n", - "\n" - "kernel void constant_vector16_restrict_p2(constant float16 *restrict constantfloat16restrictp)\n" - "{}\n", - "\n" - "kernel void global_vector16_p(global char16*globalchar16p,\n" - " global uchar16 *globaluchar16p,\n" - " global short16* globalshort16p,\n" - " global ushort16 * globalushort16p,\n" - " global int16*globalint16p,\n" - " global uint16 *globaluint16p,\n" - " global float16*globalfloat16p)\n" - "{}\n", - "\n" - "kernel void global_vector16_restrict_p(global char16 *restrict globalchar16restrictp,\n" - " global uchar16* restrict globaluchar16restrictp,\n" - " global short16 * restrict globalshort16restrictp,\n" - " global ushort16*restrict globalushort16restrictp,\n" - " global int16 *restrict globalint16restrictp,\n" - " global uint16* restrict globaluint16restrictp,\n" - " global float16 *restrict globalfloat16restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_vector16_p(global const char16* globalconstchar16p,\n" - " global const uchar16 * globalconstuchar16p,\n" - " global const short16*globalconstshort16p,\n" - " global const ushort16 *globalconstushort16p,\n" - " global const int16* globalconstint16p,\n" - " global const uint16 * globalconstuint16p,\n" - " global const float16* globalconstfloat16p)\n" - "{}\n", - "\n" - "kernel void global_const_vector16_restrict_p(global const char16 * restrict globalconstchar16restrictp,\n" - " global const uchar16*restrict globalconstuchar16restrictp,\n" - " global const short16 *restrict globalconstshort16restrictp,\n" - " global const ushort16* restrict globalconstushort16restrictp,\n" - " global const int16 * restrict globalconstint16restrictp,\n" - " global const uint16*restrict globalconstuint16restrictp,\n" - " global const float16 * restrict globalconstfloat16restrictp)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector16_p(global volatile char16*globalvolatilechar16p,\n" - " global volatile uchar16 *globalvolatileuchar16p,\n" - " global volatile short16* globalvolatileshort16p,\n" - " global volatile ushort16 * globalvolatileushort16p,\n" - " global volatile int16*globalvolatileint16p,\n" - " global volatile uint16 *globalvolatileuint16p,\n" - " global volatile float16*globalvolatilefloat16p)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector16_restrict_p(global volatile char16 *restrict globalvolatilechar16restrictp,\n" - " global volatile uchar16* restrict globalvolatileuchar16restrictp,\n" - " global volatile short16 * restrict globalvolatileshort16restrictp,\n" - " global volatile ushort16*restrict globalvolatileushort16restrictp,\n" - " global volatile int16 *restrict globalvolatileint16restrictp,\n" - " global volatile uint16* restrict globalvolatileuint16restrictp,\n" - " global volatile float16 *restrict globalvolatilefloat16restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector16_p(global const volatile char16* globalconstvolatilechar16p,\n" - " global const volatile uchar16 * globalconstvolatileuchar16p,\n" - " global const volatile short16*globalconstvolatileshort16p,\n" - " global const volatile ushort16 *globalconstvolatileushort16p,\n" - " global const volatile int16* globalconstvolatileint16p,\n" - " global const volatile uint16 * globalconstvolatileuint16p,\n" - " global const volatile float16* globalconstvolatilefloat16p)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector16_restrict_p(global const volatile char16 * restrict globalconstvolatilechar16restrictp,\n" - " global const volatile uchar16*restrict globalconstvolatileuchar16restrictp,\n" - " global const volatile short16 *restrict globalconstvolatileshort16restrictp,\n" - " global const volatile ushort16* restrict globalconstvolatileushort16restrictp,\n" - " global const volatile int16 * restrict globalconstvolatileint16restrictp,\n" - " global const volatile uint16*restrict globalconstvolatileuint16restrictp,\n" - " global const volatile float16 * restrict globalconstvolatilefloat16restrictp)\n" - "{}\n", - "\n" - "kernel void local_vector16_p(local char16*localchar16p,\n" - " local uchar16 *localuchar16p,\n" - " local short16* localshort16p,\n" - " local ushort16 * localushort16p,\n" - " local int16*localint16p,\n" - " local uint16 *localuint16p,\n" - " local float16*localfloat16p)\n" - "{}\n", - "\n" - "kernel void local_vector16_restrict_p(local char16 *restrict localchar16restrictp,\n" - " local uchar16* restrict localuchar16restrictp,\n" - " local short16 * restrict localshort16restrictp,\n" - " local ushort16*restrict localushort16restrictp,\n" - " local int16 *restrict localint16restrictp,\n" - " local uint16* restrict localuint16restrictp,\n" - " local float16 *restrict localfloat16restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_vector16_p(local const char16* localconstchar16p,\n" - " local const uchar16 * localconstuchar16p,\n" - " local const short16*localconstshort16p,\n" - " local const ushort16 *localconstushort16p,\n" - " local const int16* localconstint16p,\n" - " local const uint16 * localconstuint16p,\n" - " local const float16* localconstfloat16p)\n" - "{}\n", - "\n" - "kernel void local_const_vector16_restrict_p(local const char16 * restrict localconstchar16restrictp,\n" - " local const uchar16*restrict localconstuchar16restrictp,\n" - " local const short16 *restrict localconstshort16restrictp,\n" - " local const ushort16* restrict localconstushort16restrictp,\n" - " local const int16 * restrict localconstint16restrictp,\n" - " local const uint16*restrict localconstuint16restrictp,\n" - " local const float16 * restrict localconstfloat16restrictp)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector16_p(local volatile char16*localvolatilechar16p,\n" - " local volatile uchar16 *localvolatileuchar16p,\n" - " local volatile short16* localvolatileshort16p,\n" - " local volatile ushort16 * localvolatileushort16p,\n" - " local volatile int16*localvolatileint16p,\n" - " local volatile uint16 *localvolatileuint16p,\n" - " local volatile float16*localvolatilefloat16p)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector16_restrict_p(local volatile char16 *restrict localvolatilechar16restrictp,\n" - " local volatile uchar16* restrict localvolatileuchar16restrictp,\n" - " local volatile short16 * restrict localvolatileshort16restrictp,\n" - " local volatile ushort16*restrict localvolatileushort16restrictp,\n" - " local volatile int16 *restrict localvolatileint16restrictp,\n" - " local volatile uint16* restrict localvolatileuint16restrictp,\n" - " local volatile float16 *restrict localvolatilefloat16restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector16_p(local const volatile char16* localconstvolatilechar16p,\n" - " local const volatile uchar16 * localconstvolatileuchar16p,\n" - " local const volatile short16*localconstvolatileshort16p,\n" - " local const volatile ushort16 *localconstvolatileushort16p,\n" - " local const volatile int16* localconstvolatileint16p,\n" - " local const volatile uint16 * localconstvolatileuint16p,\n" - " local const volatile float16* localconstvolatilefloat16p)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector16_restrict_p(local const volatile char16 * restrict localconstvolatilechar16restrictp,\n" - " local const volatile uchar16*restrict localconstvolatileuchar16restrictp,\n" - " local const volatile short16 *restrict localconstvolatileshort16restrictp,\n" - " local const volatile ushort16* restrict localconstvolatileushort16restrictp,\n" - " local const volatile int16 * restrict localconstvolatileint16restrictp,\n" - " local const volatile uint16*restrict localconstvolatileuint16restrictp,\n" - " local const volatile float16 * restrict localconstvolatilefloat16restrictp)\n" - "{}\n", - "\n" - "kernel void vector16_d(char16 char16d,\n" - " uchar16 uchar16d,\n" - " short16 short16d,\n" - " ushort16 ushort16d,\n" - " int16 int16d,\n" - " uint16 uint16d,\n" - " float16 float16d)\n" - "{}\n", - "\n" - "kernel void const_vector16_d(const char16 constchar16d,\n" - " const uchar16 constuchar16d,\n" - " const short16 constshort16d,\n" - " const ushort16 constushort16d,\n" - " const int16 constint16d,\n" - " const uint16 constuint16d,\n" - " const float16 constfloat16d)\n" - "{}\n", - "\n" - "kernel void private_vector16_d(private char16 privatechar16d,\n" - " private uchar16 privateuchar16d,\n" - " private short16 privateshort16d,\n" - " private ushort16 privateushort16d,\n" - " private int16 privateint16d,\n" - " private uint16 privateuint16d,\n" - " private float16 privatefloat16d)\n" - "{}\n", - "\n" - "kernel void private_const_vector16_d(private const char16 privateconstchar16d,\n" - " private const uchar16 privateconstuchar16d,\n" - " private const short16 privateconstshort16d,\n" - " private const ushort16 privateconstushort16d,\n" - " private const int16 privateconstint16d,\n" - " private const uint16 privateconstuint16d,\n" - " private const float16 privateconstfloat16d)\n" - "{}\n", - "\n" - "kernel void constant_derived_p0(constant typedef_type*constanttypedef_typep,\n" - " constant struct struct_type *constantstructstruct_typep,\n" - " constant typedef_struct_type* constanttypedef_struct_typep,\n" - " constant union union_type * constantunionunion_typep)\n" - "{}\n", - "\n" - "kernel void constant_derived_p1(constant typedef_union_type*constanttypedef_union_typep,\n" - " constant enum enum_type *constantenumenum_typep,\n" - " constant typedef_enum_type* constanttypedef_enum_typep)\n" - "{}\n", - "\n" - "kernel void constant_derived_restrict_p0(constant typedef_type * restrict constanttypedef_typerestrictp,\n" - " constant struct struct_type*restrict constantstructstruct_typerestrictp,\n" - " constant typedef_struct_type *restrict constanttypedef_struct_typerestrictp,\n" - " constant union union_type* restrict constantunionunion_typerestrictp)\n" - "{}\n", - "\n" - "kernel void constant_derived_restrict_p1(constant typedef_union_type * restrict constanttypedef_union_typerestrictp,\n" - " constant enum enum_type*restrict constantenumenum_typerestrictp,\n" - " constant typedef_enum_type *restrict constanttypedef_enum_typerestrictp)\n" - "{}\n", - "\n" - "kernel void global_derived_p(global typedef_type*globaltypedef_typep,\n" - " global struct struct_type *globalstructstruct_typep,\n" - " global typedef_struct_type* globaltypedef_struct_typep,\n" - " global union union_type * globalunionunion_typep,\n" - " global typedef_union_type*globaltypedef_union_typep,\n" - " global enum enum_type *globalenumenum_typep,\n" - " global typedef_enum_type* globaltypedef_enum_typep)\n" - "{}\n", - "\n" - "kernel void global_derived_restrict_p(global typedef_type * restrict globaltypedef_typerestrictp,\n" - " global struct struct_type*restrict globalstructstruct_typerestrictp,\n" - " global typedef_struct_type *restrict globaltypedef_struct_typerestrictp,\n" - " global union union_type* restrict globalunionunion_typerestrictp,\n" - " global typedef_union_type * restrict globaltypedef_union_typerestrictp,\n" - " global enum enum_type*restrict globalenumenum_typerestrictp,\n" - " global typedef_enum_type *restrict globaltypedef_enum_typerestrictp)\n" - "{}\n", - "\n" - "kernel void global_const_derived_p(global const typedef_type* globalconsttypedef_typep,\n" - " global const struct struct_type * globalconststructstruct_typep,\n" - " global const typedef_struct_type*globalconsttypedef_struct_typep,\n" - " global const union union_type *globalconstunionunion_typep,\n" - " global const typedef_union_type* globalconsttypedef_union_typep,\n" - " global const enum enum_type * globalconstenumenum_typep,\n" - " global const typedef_enum_type*globalconsttypedef_enum_typep)\n" - "{}\n", - "\n" - "kernel void global_const_derived_restrict_p(global const typedef_type *restrict globalconsttypedef_typerestrictp,\n" - " global const struct struct_type* restrict globalconststructstruct_typerestrictp,\n" - " global const typedef_struct_type * restrict globalconsttypedef_struct_typerestrictp,\n" - " global const union union_type*restrict globalconstunionunion_typerestrictp,\n" - " global const typedef_union_type *restrict globalconsttypedef_union_typerestrictp,\n" - " global const enum enum_type* restrict globalconstenumenum_typerestrictp,\n" - " global const typedef_enum_type * restrict globalconsttypedef_enum_typerestrictp)\n" - "{}\n", - "\n" - "kernel void global_volatile_derived_p(global volatile typedef_type*globalvolatiletypedef_typep,\n" - " global volatile struct struct_type *globalvolatilestructstruct_typep,\n" - " global volatile typedef_struct_type* globalvolatiletypedef_struct_typep,\n" - " global volatile union union_type * globalvolatileunionunion_typep,\n" - " global volatile typedef_union_type*globalvolatiletypedef_union_typep,\n" - " global volatile enum enum_type *globalvolatileenumenum_typep,\n" - " global volatile typedef_enum_type* globalvolatiletypedef_enum_typep)\n" - "{}\n", - "\n" - "kernel void global_volatile_derived_restrict_p(global volatile typedef_type * restrict globalvolatiletypedef_typerestrictp,\n" - " global volatile struct struct_type*restrict globalvolatilestructstruct_typerestrictp,\n" - " global volatile typedef_struct_type *restrict globalvolatiletypedef_struct_typerestrictp,\n" - " global volatile union union_type* restrict globalvolatileunionunion_typerestrictp,\n" - " global volatile typedef_union_type * restrict globalvolatiletypedef_union_typerestrictp,\n" - " global volatile enum enum_type*restrict globalvolatileenumenum_typerestrictp,\n" - " global volatile typedef_enum_type *restrict globalvolatiletypedef_enum_typerestrictp)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_derived_p(global const volatile typedef_type* globalconstvolatiletypedef_typep,\n" - " global const volatile struct struct_type * globalconstvolatilestructstruct_typep,\n" - " global const volatile typedef_struct_type*globalconstvolatiletypedef_struct_typep,\n" - " global const volatile union union_type *globalconstvolatileunionunion_typep,\n" - " global const volatile typedef_union_type* globalconstvolatiletypedef_union_typep,\n" - " global const volatile enum enum_type * globalconstvolatileenumenum_typep,\n" - " global const volatile typedef_enum_type*globalconstvolatiletypedef_enum_typep)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_derived_restrict_p(global const volatile typedef_type *restrict globalconstvolatiletypedef_typerestrictp,\n" - " global const volatile struct struct_type* restrict globalconstvolatilestructstruct_typerestrictp,\n" - " global const volatile typedef_struct_type * restrict globalconstvolatiletypedef_struct_typerestrictp,\n" - " global const volatile union union_type*restrict globalconstvolatileunionunion_typerestrictp,\n" - " global const volatile typedef_union_type *restrict globalconstvolatiletypedef_union_typerestrictp,\n" - " global const volatile enum enum_type* restrict globalconstvolatileenumenum_typerestrictp,\n" - " global const volatile typedef_enum_type * restrict globalconstvolatiletypedef_enum_typerestrictp)\n" - "{}\n", - "\n" - "kernel void local_derived_p(local typedef_type*localtypedef_typep,\n" - " local struct struct_type *localstructstruct_typep,\n" - " local typedef_struct_type* localtypedef_struct_typep,\n" - " local union union_type * localunionunion_typep,\n" - " local typedef_union_type*localtypedef_union_typep,\n" - " local enum enum_type *localenumenum_typep,\n" - " local typedef_enum_type* localtypedef_enum_typep)\n" - "{}\n", - "\n" - "kernel void local_derived_restrict_p(local typedef_type * restrict localtypedef_typerestrictp,\n" - " local struct struct_type*restrict localstructstruct_typerestrictp,\n" - " local typedef_struct_type *restrict localtypedef_struct_typerestrictp,\n" - " local union union_type* restrict localunionunion_typerestrictp,\n" - " local typedef_union_type * restrict localtypedef_union_typerestrictp,\n" - " local enum enum_type*restrict localenumenum_typerestrictp,\n" - " local typedef_enum_type *restrict localtypedef_enum_typerestrictp)\n" - "{}\n", - "\n" - "kernel void local_const_derived_p(local const typedef_type* localconsttypedef_typep,\n" - " local const struct struct_type * localconststructstruct_typep,\n" - " local const typedef_struct_type*localconsttypedef_struct_typep,\n" - " local const union union_type *localconstunionunion_typep,\n" - " local const typedef_union_type* localconsttypedef_union_typep,\n" - " local const enum enum_type * localconstenumenum_typep,\n" - " local const typedef_enum_type*localconsttypedef_enum_typep)\n" - "{}\n", - "\n" - "kernel void local_const_derived_restrict_p(local const typedef_type *restrict localconsttypedef_typerestrictp,\n" - " local const struct struct_type* restrict localconststructstruct_typerestrictp,\n" - " local const typedef_struct_type * restrict localconsttypedef_struct_typerestrictp,\n" - " local const union union_type*restrict localconstunionunion_typerestrictp,\n" - " local const typedef_union_type *restrict localconsttypedef_union_typerestrictp,\n" - " local const enum enum_type* restrict localconstenumenum_typerestrictp,\n" - " local const typedef_enum_type * restrict localconsttypedef_enum_typerestrictp)\n" - "{}\n", - "\n" - "kernel void local_volatile_derived_p(local volatile typedef_type*localvolatiletypedef_typep,\n" - " local volatile struct struct_type *localvolatilestructstruct_typep,\n" - " local volatile typedef_struct_type* localvolatiletypedef_struct_typep,\n" - " local volatile union union_type * localvolatileunionunion_typep,\n" - " local volatile typedef_union_type*localvolatiletypedef_union_typep,\n" - " local volatile enum enum_type *localvolatileenumenum_typep,\n" - " local volatile typedef_enum_type* localvolatiletypedef_enum_typep)\n" - "{}\n", - "\n" - "kernel void local_volatile_derived_restrict_p(local volatile typedef_type * restrict localvolatiletypedef_typerestrictp,\n" - " local volatile struct struct_type*restrict localvolatilestructstruct_typerestrictp,\n" - " local volatile typedef_struct_type *restrict localvolatiletypedef_struct_typerestrictp,\n" - " local volatile union union_type* restrict localvolatileunionunion_typerestrictp,\n" - " local volatile typedef_union_type * restrict localvolatiletypedef_union_typerestrictp,\n" - " local volatile enum enum_type*restrict localvolatileenumenum_typerestrictp,\n" - " local volatile typedef_enum_type *restrict localvolatiletypedef_enum_typerestrictp)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_derived_p(local const volatile typedef_type* localconstvolatiletypedef_typep,\n" - " local const volatile struct struct_type * localconstvolatilestructstruct_typep,\n" - " local const volatile typedef_struct_type*localconstvolatiletypedef_struct_typep,\n" - " local const volatile union union_type *localconstvolatileunionunion_typep,\n" - " local const volatile typedef_union_type* localconstvolatiletypedef_union_typep,\n" - " local const volatile enum enum_type * localconstvolatileenumenum_typep,\n" - " local const volatile typedef_enum_type*localconstvolatiletypedef_enum_typep)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_derived_restrict_p(local const volatile typedef_type *restrict localconstvolatiletypedef_typerestrictp,\n" - " local const volatile struct struct_type* restrict localconstvolatilestructstruct_typerestrictp,\n" - " local const volatile typedef_struct_type * restrict localconstvolatiletypedef_struct_typerestrictp,\n" - " local const volatile union union_type*restrict localconstvolatileunionunion_typerestrictp,\n" - " local const volatile typedef_union_type *restrict localconstvolatiletypedef_union_typerestrictp,\n" - " local const volatile enum enum_type* restrict localconstvolatileenumenum_typerestrictp,\n" - " local const volatile typedef_enum_type * restrict localconstvolatiletypedef_enum_typerestrictp)\n" - "{}\n", - "\n" - "kernel void derived_d(typedef_type typedef_typed,\n" - " struct struct_type structstruct_typed,\n" - " typedef_struct_type typedef_struct_typed,\n" - " union union_type unionunion_typed,\n" - " typedef_union_type typedef_union_typed,\n" - " enum enum_type enumenum_typed,\n" - " typedef_enum_type typedef_enum_typed)\n" - "{}\n", - "\n" - "kernel void const_derived_d(const typedef_type consttypedef_typed,\n" - " const struct struct_type conststructstruct_typed,\n" - " const typedef_struct_type consttypedef_struct_typed,\n" - " const union union_type constunionunion_typed,\n" - " const typedef_union_type consttypedef_union_typed,\n" - " const enum enum_type constenumenum_typed,\n" - " const typedef_enum_type consttypedef_enum_typed)\n" - "{}\n", - "\n" - "kernel void private_derived_d(private typedef_type privatetypedef_typed,\n" - " private struct struct_type privatestructstruct_typed,\n" - " private typedef_struct_type privatetypedef_struct_typed,\n" - " private union union_type privateunionunion_typed,\n" - " private typedef_union_type privatetypedef_union_typed,\n" - " private enum enum_type privateenumenum_typed,\n" - " private typedef_enum_type privatetypedef_enum_typed)\n" - "{}\n", - "\n" - "kernel void private_const_derived_d(private const typedef_type privateconsttypedef_typed,\n" - " private const struct struct_type privateconststructstruct_typed,\n" - " private const typedef_struct_type privateconsttypedef_struct_typed,\n" - " private const union union_type privateconstunionunion_typed,\n" - " private const typedef_union_type privateconsttypedef_union_typed,\n" - " private const enum enum_type privateconstenumenum_typed,\n" - " private const typedef_enum_type privateconsttypedef_enum_typed)\n" - "{}\n", - "\n" -}; - -const char * required_arg_info[][72] = { - // The minimum value of CL_DEVICE_MAX_CONSTANT_ARGS is 4 - { - "constant_scalar_p0", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "void*", "constantvoidp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char*", "constantcharp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "constantucharp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "constantunsignedcharp", - NULL - }, - { - "constant_scalar_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short*", "constantshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "constantushortp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "constantunsignedshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int*", "constantintp", - NULL - }, - { - "constant_scalar_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "constantuintp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "constantunsignedintp", - NULL - }, - { - "constant_scalar_p3", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float*", "constantfloatp", - NULL - }, - { - "constant_scalar_restrict_p0", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "constantvoidrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "constantcharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "constantucharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "constantunsignedcharrestrictp", - NULL - }, - { - "constant_scalar_restrict_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "constantshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "constantushortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "constantunsignedshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "constantintrestrictp", - NULL - }, - { - "constant_scalar_restrict_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "constantuintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "constantunsignedintrestrictp", - NULL - }, - { - "constant_scalar_restrict_p3", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "constantfloatrestrictp", - NULL - }, - { - "global_scalar_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "void*", "globalvoidp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char*", "globalcharp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "globalucharp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "globalunsignedcharp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short*", "globalshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "globalushortp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "globalunsignedshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int*", "globalintp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "globaluintp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "globalunsignedintp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float*", "globalfloatp", - NULL - }, - { - "global_scalar_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalvoidrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalcharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalucharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalunsignedcharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalushortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalunsignedshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globaluintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalunsignedintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalfloatrestrictp", - NULL - }, - { - "global_const_scalar_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "void*", "globalconstvoidp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char*", "globalconstcharp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "globalconstucharp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "globalconstunsignedcharp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short*", "globalconstshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "globalconstushortp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "globalconstunsignedshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int*", "globalconstintp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "globalconstuintp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "globalconstunsignedintp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float*", "globalconstfloatp", - NULL - }, - { - "global_const_scalar_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalconstvoidrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalconstcharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstucharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstunsignedcharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalconstshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstushortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstunsignedshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalconstintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstuintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstunsignedintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalconstfloatrestrictp", - NULL - }, - { - "global_volatile_scalar_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "globalvolatilevoidp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "globalvolatilecharp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalvolatileucharp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalvolatileunsignedcharp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "globalvolatileshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalvolatileushortp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalvolatileunsignedshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "globalvolatileintp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalvolatileuintp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalvolatileunsignedintp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "globalvolatilefloatp", - NULL - }, - { - "global_volatile_scalar_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalvolatilevoidrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalvolatilecharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalvolatileucharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalvolatileunsignedcharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalvolatileshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalvolatileushortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalvolatileunsignedshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalvolatileintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalvolatileuintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalvolatileunsignedintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalvolatilefloatrestrictp", - NULL - }, - { - "global_const_volatile_scalar_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "globalconstvolatilevoidp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "globalconstvolatilecharp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalconstvolatileucharp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalconstvolatileunsignedcharp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "globalconstvolatileshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalconstvolatileushortp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalconstvolatileunsignedshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "globalconstvolatileintp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalconstvolatileuintp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalconstvolatileunsignedintp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "globalconstvolatilefloatp", - NULL - }, - { - "global_const_volatile_scalar_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalconstvolatilevoidrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalconstvolatilecharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstvolatileucharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstvolatileunsignedcharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalconstvolatileshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstvolatileushortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstvolatileunsignedshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalconstvolatileintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstvolatileuintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstvolatileunsignedintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalconstvolatilefloatrestrictp", - NULL - }, - { - "local_scalar_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "void*", "localvoidp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char*", "localcharp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "localucharp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "localunsignedcharp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short*", "localshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "localushortp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "localunsignedshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int*", "localintp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "localuintp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "localunsignedintp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float*", "localfloatp", - NULL - }, - { - "local_scalar_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localvoidrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localcharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localucharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localunsignedcharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localushortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localunsignedshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localuintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localunsignedintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localfloatrestrictp", - NULL - }, - { - "local_const_scalar_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "void*", "localconstvoidp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char*", "localconstcharp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "localconstucharp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "localconstunsignedcharp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short*", "localconstshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "localconstushortp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "localconstunsignedshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int*", "localconstintp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "localconstuintp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "localconstunsignedintp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float*", "localconstfloatp", - NULL - }, - { - "local_const_scalar_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localconstvoidrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localconstcharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstucharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstunsignedcharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localconstshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstushortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstunsignedshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localconstintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstuintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstunsignedintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localconstfloatrestrictp", - NULL - }, - { - "local_volatile_scalar_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "localvolatilevoidp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "localvolatilecharp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localvolatileucharp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localvolatileunsignedcharp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "localvolatileshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localvolatileushortp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localvolatileunsignedshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "localvolatileintp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localvolatileuintp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localvolatileunsignedintp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "localvolatilefloatp", - NULL - }, - { - "local_volatile_scalar_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localvolatilevoidrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localvolatilecharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localvolatileucharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localvolatileunsignedcharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localvolatileshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localvolatileushortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localvolatileunsignedshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localvolatileintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localvolatileuintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localvolatileunsignedintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localvolatilefloatrestrictp", - NULL - }, - { - "local_const_volatile_scalar_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "localconstvolatilevoidp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "localconstvolatilecharp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localconstvolatileucharp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localconstvolatileunsignedcharp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "localconstvolatileshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localconstvolatileushortp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localconstvolatileunsignedshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "localconstvolatileintp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localconstvolatileuintp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localconstvolatileunsignedintp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "localconstvolatilefloatp", - NULL - }, - { - "local_const_volatile_scalar_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localconstvolatilevoidrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localconstvolatilecharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstvolatileucharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstvolatileunsignedcharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localconstvolatileshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstvolatileushortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstvolatileunsignedshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localconstvolatileintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstvolatileuintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstvolatileunsignedintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localconstvolatilefloatrestrictp", - NULL - }, - { - "scalar_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "chard", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "uchard", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "unsignedchard", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "shortd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "ushortd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "unsignedshortd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "intd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "uintd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "unsignedintd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "floatd", - NULL - }, - { - "const_scalar_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "constchard", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "constuchard", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "constunsignedchard", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "constshortd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "constushortd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "constunsignedshortd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "constintd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "constuintd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "constunsignedintd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "constfloatd", - NULL - }, - { - "private_scalar_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "privatechard", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateuchard", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateunsignedchard", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "privateshortd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateushortd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateunsignedshortd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "privateintd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateuintd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateunsignedintd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "privatefloatd", - NULL - }, - { - "private_const_scalar_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "privateconstchard", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateconstuchard", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateconstunsignedchard", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "privateconstshortd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateconstushortd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateconstunsignedshortd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "privateconstintd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateconstuintd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateconstunsignedintd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "privateconstfloatd", - NULL - }, - { - "constant_vector2_p0", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char2*", "constantchar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar2*", "constantuchar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short2*", "constantshort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort2*", "constantushort2p", - NULL - }, - { - "constant_vector2_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int2*", "constantint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint2*", "constantuint2p", - NULL - }, - { - "constant_vector2_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float2*", "constantfloat2p", - NULL - }, - { - "constant_vector2_restrict_p0", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "constantchar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "constantuchar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "constantshort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "constantushort2restrictp", - NULL - }, - { - "constant_vector2_restrict_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "constantint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "constantuint2restrictp", - NULL - }, - { - "constant_vector2_restrict_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "constantfloat2restrictp", - NULL - }, - { - "global_vector2_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2*", "globalchar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2*", "globaluchar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2*", "globalshort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2*", "globalushort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2*", "globalint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2*", "globaluint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2*", "globalfloat2p", - NULL - }, - { - "global_vector2_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalchar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globaluchar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalshort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalushort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globaluint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalfloat2restrictp", - NULL - }, - { - "global_const_vector2_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char2*", "globalconstchar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar2*", "globalconstuchar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short2*", "globalconstshort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort2*", "globalconstushort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int2*", "globalconstint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint2*", "globalconstuint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float2*", "globalconstfloat2p", - NULL - }, - { - "global_const_vector2_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalconstchar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globalconstuchar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalconstshort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalconstushort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalconstint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globalconstuint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalconstfloat2restrictp", - NULL - }, - { - "global_volatile_vector2_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "globalvolatilechar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "globalvolatileuchar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "globalvolatileshort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "globalvolatileushort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "globalvolatileint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "globalvolatileuint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "globalvolatilefloat2p", - NULL - }, - { - "global_volatile_vector2_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalvolatilechar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globalvolatileuchar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalvolatileshort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalvolatileushort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalvolatileint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globalvolatileuint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalvolatilefloat2restrictp", - NULL - }, - { - "global_const_volatile_vector2_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "globalconstvolatilechar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "globalconstvolatileuchar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "globalconstvolatileshort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "globalconstvolatileushort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "globalconstvolatileint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "globalconstvolatileuint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "globalconstvolatilefloat2p", - NULL - }, - { - "global_const_volatile_vector2_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalconstvolatilechar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globalconstvolatileuchar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalconstvolatileshort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalconstvolatileushort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalconstvolatileint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globalconstvolatileuint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalconstvolatilefloat2restrictp", - NULL - }, - { - "local_vector2_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2*", "localchar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2*", "localuchar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2*", "localshort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2*", "localushort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2*", "localint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2*", "localuint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2*", "localfloat2p", - NULL - }, - { - "local_vector2_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localchar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localuchar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localshort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localushort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localuint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localfloat2restrictp", - NULL - }, - { - "local_const_vector2_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char2*", "localconstchar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar2*", "localconstuchar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short2*", "localconstshort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort2*", "localconstushort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int2*", "localconstint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint2*", "localconstuint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float2*", "localconstfloat2p", - NULL - }, - { - "local_const_vector2_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localconstchar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localconstuchar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localconstshort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localconstushort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localconstint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localconstuint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localconstfloat2restrictp", - NULL - }, - { - "local_volatile_vector2_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "localvolatilechar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "localvolatileuchar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "localvolatileshort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "localvolatileushort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "localvolatileint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "localvolatileuint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "localvolatilefloat2p", - NULL - }, - { - "local_volatile_vector2_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localvolatilechar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localvolatileuchar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localvolatileshort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localvolatileushort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localvolatileint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localvolatileuint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localvolatilefloat2restrictp", - NULL - }, - { - "local_const_volatile_vector2_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "localconstvolatilechar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "localconstvolatileuchar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "localconstvolatileshort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "localconstvolatileushort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "localconstvolatileint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "localconstvolatileuint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "localconstvolatilefloat2p", - NULL - }, - { - "local_const_volatile_vector2_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localconstvolatilechar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localconstvolatileuchar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localconstvolatileshort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localconstvolatileushort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localconstvolatileint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localconstvolatileuint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localconstvolatilefloat2restrictp", - NULL - }, - { - "vector2_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "char2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "uchar2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "short2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "ushort2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "int2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "uint2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "float2d", - NULL - }, - { - "const_vector2_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "constchar2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "constuchar2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "constshort2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "constushort2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "constint2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "constuint2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "constfloat2d", - NULL - }, - { - "private_vector2_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "privatechar2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "privateuchar2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "privateshort2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "privateushort2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "privateint2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "privateuint2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "privatefloat2d", - NULL - }, - { - "private_const_vector2_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "privateconstchar2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "privateconstuchar2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "privateconstshort2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "privateconstushort2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "privateconstint2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "privateconstuint2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "privateconstfloat2d", - NULL - }, - { - "constant_vector3_p0", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char3*", "constantchar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar3*", "constantuchar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short3*", "constantshort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort3*", "constantushort3p", - NULL - }, - { - "constant_vector3_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int3*", "constantint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint3*", "constantuint3p", - NULL - }, - { - "constant_vector3_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float3*", "constantfloat3p", - NULL - }, - { - "constant_vector3_restrict_p0", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "constantchar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "constantuchar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "constantshort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "constantushort3restrictp", - NULL - }, - { - "constant_vector3_restrict_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "constantint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "constantuint3restrictp", - NULL - }, - { - "constant_vector3_restrict_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "constantfloat3restrictp", - NULL - }, - { - "global_vector3_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3*", "globalchar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3*", "globaluchar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3*", "globalshort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3*", "globalushort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3*", "globalint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3*", "globaluint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3*", "globalfloat3p", - NULL - }, - { - "global_vector3_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalchar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globaluchar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalshort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalushort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globaluint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalfloat3restrictp", - NULL - }, - { - "global_const_vector3_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char3*", "globalconstchar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar3*", "globalconstuchar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short3*", "globalconstshort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort3*", "globalconstushort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int3*", "globalconstint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint3*", "globalconstuint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float3*", "globalconstfloat3p", - NULL - }, - { - "global_const_vector3_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalconstchar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globalconstuchar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalconstshort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalconstushort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalconstint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globalconstuint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalconstfloat3restrictp", - NULL - }, - { - "global_volatile_vector3_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "globalvolatilechar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "globalvolatileuchar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "globalvolatileshort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "globalvolatileushort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "globalvolatileint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "globalvolatileuint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "globalvolatilefloat3p", - NULL - }, - { - "global_volatile_vector3_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalvolatilechar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globalvolatileuchar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalvolatileshort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalvolatileushort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalvolatileint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globalvolatileuint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalvolatilefloat3restrictp", - NULL - }, - { - "global_const_volatile_vector3_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "globalconstvolatilechar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "globalconstvolatileuchar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "globalconstvolatileshort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "globalconstvolatileushort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "globalconstvolatileint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "globalconstvolatileuint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "globalconstvolatilefloat3p", - NULL - }, - { - "global_const_volatile_vector3_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalconstvolatilechar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globalconstvolatileuchar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalconstvolatileshort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalconstvolatileushort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalconstvolatileint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globalconstvolatileuint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalconstvolatilefloat3restrictp", - NULL - }, - { - "local_vector3_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3*", "localchar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3*", "localuchar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3*", "localshort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3*", "localushort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3*", "localint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3*", "localuint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3*", "localfloat3p", - NULL - }, - { - "local_vector3_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localchar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localuchar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localshort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localushort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localuint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localfloat3restrictp", - NULL - }, - { - "local_const_vector3_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char3*", "localconstchar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar3*", "localconstuchar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short3*", "localconstshort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort3*", "localconstushort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int3*", "localconstint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint3*", "localconstuint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float3*", "localconstfloat3p", - NULL - }, - { - "local_const_vector3_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localconstchar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localconstuchar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localconstshort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localconstushort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localconstint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localconstuint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localconstfloat3restrictp", - NULL - }, - { - "local_volatile_vector3_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "localvolatilechar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "localvolatileuchar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "localvolatileshort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "localvolatileushort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "localvolatileint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "localvolatileuint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "localvolatilefloat3p", - NULL - }, - { - "local_volatile_vector3_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localvolatilechar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localvolatileuchar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localvolatileshort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localvolatileushort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localvolatileint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localvolatileuint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localvolatilefloat3restrictp", - NULL - }, - { - "local_const_volatile_vector3_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "localconstvolatilechar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "localconstvolatileuchar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "localconstvolatileshort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "localconstvolatileushort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "localconstvolatileint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "localconstvolatileuint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "localconstvolatilefloat3p", - NULL - }, - { - "local_const_volatile_vector3_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localconstvolatilechar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localconstvolatileuchar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localconstvolatileshort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localconstvolatileushort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localconstvolatileint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localconstvolatileuint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localconstvolatilefloat3restrictp", - NULL - }, - { - "vector3_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "char3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "uchar3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "short3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "ushort3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "int3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "uint3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "float3d", - NULL - }, - { - "const_vector3_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "constchar3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "constuchar3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "constshort3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "constushort3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "constint3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "constuint3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "constfloat3d", - NULL - }, - { - "private_vector3_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "privatechar3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "privateuchar3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "privateshort3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "privateushort3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "privateint3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "privateuint3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "privatefloat3d", - NULL - }, - { - "private_const_vector3_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "privateconstchar3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "privateconstuchar3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "privateconstshort3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "privateconstushort3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "privateconstint3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "privateconstuint3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "privateconstfloat3d", - NULL - }, - { - "constant_vector4_p0", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char4*", "constantchar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar4*", "constantuchar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short4*", "constantshort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort4*", "constantushort4p", - NULL - }, - { - "constant_vector4_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int4*", "constantint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint4*", "constantuint4p", - NULL - }, - { - "constant_vector4_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float4*", "constantfloat4p", - NULL - }, - { - "constant_vector4_restrict_p0", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "constantchar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "constantuchar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "constantshort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "constantushort4restrictp", - NULL - }, - { - "constant_vector4_restrict_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "constantint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "constantuint4restrictp", - NULL - }, - { - "constant_vector4_restrict_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "constantfloat4restrictp", - NULL - }, - { - "global_vector4_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4*", "globalchar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4*", "globaluchar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4*", "globalshort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4*", "globalushort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4*", "globalint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4*", "globaluint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4*", "globalfloat4p", - NULL - }, - { - "global_vector4_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalchar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globaluchar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalshort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalushort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globaluint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalfloat4restrictp", - NULL - }, - { - "global_const_vector4_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char4*", "globalconstchar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar4*", "globalconstuchar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short4*", "globalconstshort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort4*", "globalconstushort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int4*", "globalconstint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint4*", "globalconstuint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float4*", "globalconstfloat4p", - NULL - }, - { - "global_const_vector4_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalconstchar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globalconstuchar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalconstshort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalconstushort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalconstint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globalconstuint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalconstfloat4restrictp", - NULL - }, - { - "global_volatile_vector4_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "globalvolatilechar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "globalvolatileuchar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "globalvolatileshort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "globalvolatileushort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "globalvolatileint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "globalvolatileuint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "globalvolatilefloat4p", - NULL - }, - { - "global_volatile_vector4_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalvolatilechar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globalvolatileuchar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalvolatileshort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalvolatileushort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalvolatileint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globalvolatileuint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalvolatilefloat4restrictp", - NULL - }, - { - "global_const_volatile_vector4_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "globalconstvolatilechar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "globalconstvolatileuchar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "globalconstvolatileshort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "globalconstvolatileushort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "globalconstvolatileint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "globalconstvolatileuint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "globalconstvolatilefloat4p", - NULL - }, - { - "global_const_volatile_vector4_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalconstvolatilechar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globalconstvolatileuchar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalconstvolatileshort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalconstvolatileushort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalconstvolatileint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globalconstvolatileuint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalconstvolatilefloat4restrictp", - NULL - }, - { - "local_vector4_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4*", "localchar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4*", "localuchar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4*", "localshort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4*", "localushort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4*", "localint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4*", "localuint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4*", "localfloat4p", - NULL - }, - { - "local_vector4_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localchar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localuchar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localshort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localushort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localuint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localfloat4restrictp", - NULL - }, - { - "local_const_vector4_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char4*", "localconstchar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar4*", "localconstuchar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short4*", "localconstshort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort4*", "localconstushort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int4*", "localconstint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint4*", "localconstuint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float4*", "localconstfloat4p", - NULL - }, - { - "local_const_vector4_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localconstchar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localconstuchar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localconstshort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localconstushort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localconstint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localconstuint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localconstfloat4restrictp", - NULL - }, - { - "local_volatile_vector4_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "localvolatilechar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "localvolatileuchar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "localvolatileshort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "localvolatileushort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "localvolatileint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "localvolatileuint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "localvolatilefloat4p", - NULL - }, - { - "local_volatile_vector4_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localvolatilechar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localvolatileuchar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localvolatileshort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localvolatileushort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localvolatileint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localvolatileuint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localvolatilefloat4restrictp", - NULL - }, - { - "local_const_volatile_vector4_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "localconstvolatilechar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "localconstvolatileuchar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "localconstvolatileshort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "localconstvolatileushort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "localconstvolatileint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "localconstvolatileuint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "localconstvolatilefloat4p", - NULL - }, - { - "local_const_volatile_vector4_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localconstvolatilechar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localconstvolatileuchar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localconstvolatileshort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localconstvolatileushort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localconstvolatileint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localconstvolatileuint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localconstvolatilefloat4restrictp", - NULL - }, - { - "vector4_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "char4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "uchar4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "short4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "ushort4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "int4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "uint4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "float4d", - NULL - }, - { - "const_vector4_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "constchar4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "constuchar4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "constshort4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "constushort4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "constint4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "constuint4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "constfloat4d", - NULL - }, - { - "private_vector4_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "privatechar4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "privateuchar4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "privateshort4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "privateushort4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "privateint4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "privateuint4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "privatefloat4d", - NULL - }, - { - "private_const_vector4_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "privateconstchar4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "privateconstuchar4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "privateconstshort4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "privateconstushort4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "privateconstint4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "privateconstuint4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "privateconstfloat4d", - NULL - }, - { - "constant_vector8_p0", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char8*", "constantchar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar8*", "constantuchar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short8*", "constantshort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort8*", "constantushort8p", - NULL - }, - { - "constant_vector8_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int8*", "constantint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint8*", "constantuint8p", - NULL - }, - { - "constant_vector8_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float8*", "constantfloat8p", - NULL - }, - { - "constant_vector8_restrict_p0", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "constantchar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "constantuchar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "constantshort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "constantushort8restrictp", - NULL - }, - { - "constant_vector8_restrict_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "constantint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "constantuint8restrictp", - NULL - }, - { - "constant_vector8_restrict_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "constantfloat8restrictp", - NULL - }, - { - "global_vector8_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8*", "globalchar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8*", "globaluchar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8*", "globalshort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8*", "globalushort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8*", "globalint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8*", "globaluint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8*", "globalfloat8p", - NULL - }, - { - "global_vector8_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalchar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globaluchar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalshort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalushort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globaluint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalfloat8restrictp", - NULL - }, - { - "global_const_vector8_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char8*", "globalconstchar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar8*", "globalconstuchar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short8*", "globalconstshort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort8*", "globalconstushort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int8*", "globalconstint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint8*", "globalconstuint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float8*", "globalconstfloat8p", - NULL - }, - { - "global_const_vector8_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalconstchar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globalconstuchar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalconstshort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalconstushort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalconstint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globalconstuint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalconstfloat8restrictp", - NULL - }, - { - "global_volatile_vector8_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "globalvolatilechar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "globalvolatileuchar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "globalvolatileshort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "globalvolatileushort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "globalvolatileint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "globalvolatileuint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "globalvolatilefloat8p", - NULL - }, - { - "global_volatile_vector8_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalvolatilechar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globalvolatileuchar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalvolatileshort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalvolatileushort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalvolatileint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globalvolatileuint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalvolatilefloat8restrictp", - NULL - }, - { - "global_const_volatile_vector8_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "globalconstvolatilechar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "globalconstvolatileuchar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "globalconstvolatileshort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "globalconstvolatileushort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "globalconstvolatileint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "globalconstvolatileuint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "globalconstvolatilefloat8p", - NULL - }, - { - "global_const_volatile_vector8_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalconstvolatilechar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globalconstvolatileuchar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalconstvolatileshort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalconstvolatileushort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalconstvolatileint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globalconstvolatileuint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalconstvolatilefloat8restrictp", - NULL - }, - { - "local_vector8_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8*", "localchar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8*", "localuchar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8*", "localshort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8*", "localushort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8*", "localint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8*", "localuint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8*", "localfloat8p", - NULL - }, - { - "local_vector8_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localchar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localuchar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localshort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localushort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localuint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localfloat8restrictp", - NULL - }, - { - "local_const_vector8_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char8*", "localconstchar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar8*", "localconstuchar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short8*", "localconstshort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort8*", "localconstushort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int8*", "localconstint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint8*", "localconstuint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float8*", "localconstfloat8p", - NULL - }, - { - "local_const_vector8_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localconstchar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localconstuchar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localconstshort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localconstushort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localconstint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localconstuint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localconstfloat8restrictp", - NULL - }, - { - "local_volatile_vector8_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "localvolatilechar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "localvolatileuchar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "localvolatileshort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "localvolatileushort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "localvolatileint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "localvolatileuint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "localvolatilefloat8p", - NULL - }, - { - "local_volatile_vector8_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localvolatilechar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localvolatileuchar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localvolatileshort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localvolatileushort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localvolatileint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localvolatileuint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localvolatilefloat8restrictp", - NULL - }, - { - "local_const_volatile_vector8_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "localconstvolatilechar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "localconstvolatileuchar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "localconstvolatileshort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "localconstvolatileushort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "localconstvolatileint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "localconstvolatileuint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "localconstvolatilefloat8p", - NULL - }, - { - "local_const_volatile_vector8_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localconstvolatilechar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localconstvolatileuchar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localconstvolatileshort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localconstvolatileushort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localconstvolatileint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localconstvolatileuint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localconstvolatilefloat8restrictp", - NULL - }, - { - "vector8_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "char8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "uchar8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "short8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "ushort8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "int8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "uint8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "float8d", - NULL - }, - { - "const_vector8_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "constchar8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "constuchar8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "constshort8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "constushort8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "constint8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "constuint8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "constfloat8d", - NULL - }, - { - "private_vector8_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "privatechar8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "privateuchar8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "privateshort8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "privateushort8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "privateint8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "privateuint8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "privatefloat8d", - NULL - }, - { - "private_const_vector8_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "privateconstchar8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "privateconstuchar8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "privateconstshort8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "privateconstushort8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "privateconstint8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "privateconstuint8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "privateconstfloat8d", - NULL - }, - { - "constant_vector16_p0", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char16*", "constantchar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar16*", "constantuchar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short16*", "constantshort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort16*", "constantushort16p", - NULL - }, - { - "constant_vector16_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int16*", "constantint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint16*", "constantuint16p", - NULL - }, - { - "constant_vector16_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float16*", "constantfloat16p", - NULL - }, - { - "constant_vector16_restrict_p0", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "constantchar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "constantuchar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "constantshort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "constantushort16restrictp", - NULL - }, - { - "constant_vector16_restrict_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "constantint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "constantuint16restrictp", - NULL - }, - { - "constant_vector16_restrict_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "constantfloat16restrictp", - NULL - }, - { - "global_vector16_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16*", "globalchar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16*", "globaluchar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16*", "globalshort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16*", "globalushort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16*", "globalint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16*", "globaluint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16*", "globalfloat16p", - NULL - }, - { - "global_vector16_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalchar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globaluchar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalshort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalushort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globaluint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalfloat16restrictp", - NULL - }, - { - "global_const_vector16_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char16*", "globalconstchar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar16*", "globalconstuchar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short16*", "globalconstshort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort16*", "globalconstushort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int16*", "globalconstint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint16*", "globalconstuint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float16*", "globalconstfloat16p", - NULL - }, - { - "global_const_vector16_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalconstchar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globalconstuchar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalconstshort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalconstushort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalconstint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globalconstuint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalconstfloat16restrictp", - NULL - }, - { - "global_volatile_vector16_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "globalvolatilechar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "globalvolatileuchar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "globalvolatileshort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "globalvolatileushort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "globalvolatileint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "globalvolatileuint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "globalvolatilefloat16p", - NULL - }, - { - "global_volatile_vector16_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalvolatilechar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globalvolatileuchar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalvolatileshort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalvolatileushort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalvolatileint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globalvolatileuint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalvolatilefloat16restrictp", - NULL - }, - { - "global_const_volatile_vector16_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "globalconstvolatilechar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "globalconstvolatileuchar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "globalconstvolatileshort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "globalconstvolatileushort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "globalconstvolatileint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "globalconstvolatileuint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "globalconstvolatilefloat16p", - NULL - }, - { - "global_const_volatile_vector16_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalconstvolatilechar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globalconstvolatileuchar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalconstvolatileshort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalconstvolatileushort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalconstvolatileint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globalconstvolatileuint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalconstvolatilefloat16restrictp", - NULL - }, - { - "local_vector16_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16*", "localchar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16*", "localuchar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16*", "localshort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16*", "localushort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16*", "localint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16*", "localuint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16*", "localfloat16p", - NULL - }, - { - "local_vector16_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localchar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localuchar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localshort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localushort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localuint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localfloat16restrictp", - NULL - }, - { - "local_const_vector16_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char16*", "localconstchar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar16*", "localconstuchar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short16*", "localconstshort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort16*", "localconstushort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int16*", "localconstint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint16*", "localconstuint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float16*", "localconstfloat16p", - NULL - }, - { - "local_const_vector16_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localconstchar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localconstuchar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localconstshort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localconstushort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localconstint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localconstuint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localconstfloat16restrictp", - NULL - }, - { - "local_volatile_vector16_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "localvolatilechar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "localvolatileuchar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "localvolatileshort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "localvolatileushort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "localvolatileint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "localvolatileuint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "localvolatilefloat16p", - NULL - }, - { - "local_volatile_vector16_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localvolatilechar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localvolatileuchar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localvolatileshort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localvolatileushort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localvolatileint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localvolatileuint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localvolatilefloat16restrictp", - NULL - }, - { - "local_const_volatile_vector16_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "localconstvolatilechar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "localconstvolatileuchar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "localconstvolatileshort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "localconstvolatileushort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "localconstvolatileint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "localconstvolatileuint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "localconstvolatilefloat16p", - NULL - }, - { - "local_const_volatile_vector16_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localconstvolatilechar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localconstvolatileuchar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localconstvolatileshort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localconstvolatileushort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localconstvolatileint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localconstvolatileuint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localconstvolatilefloat16restrictp", - NULL - }, - { - "vector16_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "char16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "uchar16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "short16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "ushort16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "int16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "uint16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "float16d", - NULL - }, - { - "const_vector16_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "constchar16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "constuchar16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "constshort16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "constushort16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "constint16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "constuint16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "constfloat16d", - NULL - }, - { - "private_vector16_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "privatechar16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "privateuchar16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "privateshort16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "privateushort16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "privateint16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "privateuint16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "privatefloat16d", - NULL - }, - { - "private_const_vector16_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "privateconstchar16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "privateconstuchar16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "privateconstshort16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "privateconstushort16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "privateconstint16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "privateconstuint16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "privateconstfloat16d", - NULL - }, - { - "constant_derived_p0", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_type*", "constanttypedef_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "struct struct_type*", "constantstructstruct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_struct_type*", "constanttypedef_struct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "union union_type*", "constantunionunion_typep", - NULL - }, - { - "constant_derived_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_union_type*", "constanttypedef_union_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "enum enum_type*", "constantenumenum_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_enum_type*", "constanttypedef_enum_typep", - NULL - }, - { - "constant_derived_restrict_p0", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "constanttypedef_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "constantstructstruct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "constanttypedef_struct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "constantunionunion_typerestrictp", - NULL - }, - { - "constant_derived_restrict_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "constanttypedef_union_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "constantenumenum_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "constanttypedef_enum_typerestrictp", - NULL - }, - { - "global_derived_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type*", "globaltypedef_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type*", "globalstructstruct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type*", "globaltypedef_struct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type*", "globalunionunion_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type*", "globaltypedef_union_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type*", "globalenumenum_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type*", "globaltypedef_enum_typep", - NULL - }, - { - "global_derived_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globaltypedef_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalstructstruct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globaltypedef_struct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalunionunion_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globaltypedef_union_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalenumenum_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globaltypedef_enum_typerestrictp", - NULL - }, - { - "global_const_derived_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_type*", "globalconsttypedef_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "struct struct_type*", "globalconststructstruct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_struct_type*", "globalconsttypedef_struct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "union union_type*", "globalconstunionunion_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_union_type*", "globalconsttypedef_union_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "enum enum_type*", "globalconstenumenum_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_enum_type*", "globalconsttypedef_enum_typep", - NULL - }, - { - "global_const_derived_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globalconsttypedef_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalconststructstruct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globalconsttypedef_struct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalconstunionunion_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globalconsttypedef_union_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalconstenumenum_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globalconsttypedef_enum_typerestrictp", - NULL - }, - { - "global_volatile_derived_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "globalvolatiletypedef_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "globalvolatilestructstruct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "globalvolatiletypedef_struct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "globalvolatileunionunion_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "globalvolatiletypedef_union_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "globalvolatileenumenum_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "globalvolatiletypedef_enum_typep", - NULL - }, - { - "global_volatile_derived_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globalvolatiletypedef_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalvolatilestructstruct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globalvolatiletypedef_struct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalvolatileunionunion_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globalvolatiletypedef_union_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalvolatileenumenum_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globalvolatiletypedef_enum_typerestrictp", - NULL - }, - { - "global_const_volatile_derived_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "globalconstvolatiletypedef_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "globalconstvolatilestructstruct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "globalconstvolatiletypedef_struct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "globalconstvolatileunionunion_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "globalconstvolatiletypedef_union_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "globalconstvolatileenumenum_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "globalconstvolatiletypedef_enum_typep", - NULL - }, - { - "global_const_volatile_derived_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globalconstvolatiletypedef_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalconstvolatilestructstruct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globalconstvolatiletypedef_struct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalconstvolatileunionunion_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globalconstvolatiletypedef_union_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalconstvolatileenumenum_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globalconstvolatiletypedef_enum_typerestrictp", - NULL - }, - { - "local_derived_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type*", "localtypedef_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type*", "localstructstruct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type*", "localtypedef_struct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type*", "localunionunion_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type*", "localtypedef_union_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type*", "localenumenum_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type*", "localtypedef_enum_typep", - NULL - }, - { - "local_derived_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localtypedef_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localstructstruct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localtypedef_struct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localunionunion_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localtypedef_union_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localenumenum_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localtypedef_enum_typerestrictp", - NULL - }, - { - "local_const_derived_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_type*", "localconsttypedef_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "struct struct_type*", "localconststructstruct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_struct_type*", "localconsttypedef_struct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "union union_type*", "localconstunionunion_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_union_type*", "localconsttypedef_union_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "enum enum_type*", "localconstenumenum_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_enum_type*", "localconsttypedef_enum_typep", - NULL - }, - { - "local_const_derived_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localconsttypedef_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localconststructstruct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localconsttypedef_struct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localconstunionunion_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localconsttypedef_union_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localconstenumenum_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localconsttypedef_enum_typerestrictp", - NULL - }, - { - "local_volatile_derived_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "localvolatiletypedef_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "localvolatilestructstruct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "localvolatiletypedef_struct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "localvolatileunionunion_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "localvolatiletypedef_union_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "localvolatileenumenum_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "localvolatiletypedef_enum_typep", - NULL - }, - { - "local_volatile_derived_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localvolatiletypedef_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localvolatilestructstruct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localvolatiletypedef_struct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localvolatileunionunion_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localvolatiletypedef_union_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localvolatileenumenum_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localvolatiletypedef_enum_typerestrictp", - NULL - }, - { - "local_const_volatile_derived_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "localconstvolatiletypedef_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "localconstvolatilestructstruct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "localconstvolatiletypedef_struct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "localconstvolatileunionunion_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "localconstvolatiletypedef_union_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "localconstvolatileenumenum_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "localconstvolatiletypedef_enum_typep", - NULL - }, - { - "local_const_volatile_derived_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localconstvolatiletypedef_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localconstvolatilestructstruct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localconstvolatiletypedef_struct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localconstvolatileunionunion_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localconstvolatiletypedef_union_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localconstvolatileenumenum_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localconstvolatiletypedef_enum_typerestrictp", - NULL - }, - { - "derived_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "typedef_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "structstruct_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "typedef_struct_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "unionunion_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "typedef_union_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "enumenum_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "typedef_enum_typed", - NULL - }, - { - "const_derived_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "consttypedef_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "conststructstruct_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "consttypedef_struct_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "constunionunion_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "consttypedef_union_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "constenumenum_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "consttypedef_enum_typed", - NULL - }, - { - "private_derived_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "privatetypedef_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "privatestructstruct_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "privatetypedef_struct_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "privateunionunion_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "privatetypedef_union_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "privateenumenum_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "privatetypedef_enum_typed", - NULL - }, - { - "private_const_derived_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "privateconsttypedef_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "privateconststructstruct_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "privateconsttypedef_struct_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "privateconstunionunion_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "privateconsttypedef_union_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "privateconstenumenum_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "privateconsttypedef_enum_typed", - NULL - }, -}; - -// Support for optional image data type -const char * image_kernel_args[] = { - "#pragma OPENCL EXTENSION cl_khr_3d_image_writes: enable \n" - "kernel void image_d(read_only image2d_t image2d_td0,\n" - " write_only image2d_t image2d_td1,\n" - " read_only image3d_t image3d_td2,\n" - " write_only image3d_t image3d_td3,\n" - " read_only image2d_array_t image2d_array_td4,\n" - " write_only image2d_array_t image2d_array_td5,\n" - " read_only image1d_t image1d_td6,\n" - " write_only image1d_t image1d_td7,\n" - " read_only image1d_buffer_t image1d_buffer_td8,\n" - " write_only image1d_buffer_t image1d_buffer_td9,\n" - " read_only image1d_array_t image1d_array_td10,\n" - " write_only image1d_array_t image1d_array_td11,\n" - " sampler_t sampler_td12)\n" - "{}\n", - "\n" -}; - -const char * image_arg_info[][67] = { - { - "image_d", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_t", "image2d_td0", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_t", "image2d_td1", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image3d_t", "image3d_td2", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image3d_t", "image3d_td3", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_array_t", "image2d_array_td4", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_array_t", "image2d_array_td5", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_t", "image1d_td6", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_t", "image1d_td7", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_buffer_t", "image1d_buffer_td8", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_buffer_t", "image1d_buffer_td9", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_array_t", "image1d_array_td10", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_array_t", "image1d_array_td11", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "sampler_t", "sampler_td12", - NULL - }, -}; - -// Support for optional double data type -const char * double_kernel_args[] = { - "kernel void double_scalar_p(constant double*constantdoublep,\n" - " constant double *restrict constantdoublerestrictp,\n" - " global double*globaldoublep,\n" - " global double *restrict globaldoublerestrictp,\n" - " global const double* globalconstdoublep,\n" - " global const double * restrict globalconstdoublerestrictp,\n" - " global volatile double*globalvolatiledoublep,\n" - " global volatile double *restrict globalvolatiledoublerestrictp,\n" - " global const volatile double* globalconstvolatiledoublep)\n" - "{}\n", - "\n" - "kernel void double_scalar_p2(global const volatile double * restrict globalconstvolatiledoublerestrictp,\n" - " local double*localdoublep,\n" - " local double *restrict localdoublerestrictp,\n" - " local const double* localconstdoublep,\n" - " local const double * restrict localconstdoublerestrictp,\n" - " local volatile double*localvolatiledoublep,\n" - " local volatile double *restrict localvolatiledoublerestrictp,\n" - " local const volatile double* localconstvolatiledoublep,\n" - " local const volatile double * restrict localconstvolatiledoublerestrictp)\n" - "{}\n", - "\n" - "kernel void double_scalar_d(double doubled,\n" - " const double constdoubled,\n" - " private double privatedoubled,\n" - " private const double privateconstdoubled)\n" - "{}\n", - "\n" - "kernel void double_vector2_p(constant double2*constantdouble2p,\n" - " constant double2 *restrict constantdouble2restrictp,\n" - " global double2*globaldouble2p,\n" - " global double2 *restrict globaldouble2restrictp,\n" - " global const double2* globalconstdouble2p,\n" - " global const double2 * restrict globalconstdouble2restrictp,\n" - " global volatile double2*globalvolatiledouble2p,\n" - " global volatile double2 *restrict globalvolatiledouble2restrictp,\n" - " global const volatile double2* globalconstvolatiledouble2p)\n" - "{}\n", - "\n" - "kernel void double_vector2_p2(global const volatile double2 * restrict globalconstvolatiledouble2restrictp,\n" - " local double2*localdouble2p,\n" - " local double2 *restrict localdouble2restrictp,\n" - " local const double2* localconstdouble2p,\n" - " local const double2 * restrict localconstdouble2restrictp,\n" - " local volatile double2*localvolatiledouble2p,\n" - " local volatile double2 *restrict localvolatiledouble2restrictp,\n" - " local const volatile double2* localconstvolatiledouble2p,\n" - " local const volatile double2 * restrict localconstvolatiledouble2restrictp)\n" - "{}\n", - "\n" - "kernel void double_vector2_d(double2 double2d,\n" - " const double2 constdouble2d,\n" - " private double2 privatedouble2d,\n" - " private const double2 privateconstdouble2d)\n" - "{}\n", - "\n" - "kernel void double_vector3_p(constant double3*constantdouble3p,\n" - " constant double3 *restrict constantdouble3restrictp,\n" - " global double3*globaldouble3p,\n" - " global double3 *restrict globaldouble3restrictp,\n" - " global const double3* globalconstdouble3p,\n" - " global const double3 * restrict globalconstdouble3restrictp,\n" - " global volatile double3*globalvolatiledouble3p,\n" - " global volatile double3 *restrict globalvolatiledouble3restrictp,\n" - " global const volatile double3* globalconstvolatiledouble3p)\n" - "{}\n", - "\n" - "kernel void double_vector3_p2(global const volatile double3 * restrict globalconstvolatiledouble3restrictp,\n" - " local double3*localdouble3p,\n" - " local double3 *restrict localdouble3restrictp,\n" - " local const double3* localconstdouble3p,\n" - " local const double3 * restrict localconstdouble3restrictp,\n" - " local volatile double3*localvolatiledouble3p,\n" - " local volatile double3 *restrict localvolatiledouble3restrictp,\n" - " local const volatile double3* localconstvolatiledouble3p,\n" - " local const volatile double3 * restrict localconstvolatiledouble3restrictp)\n" - "{}\n", - "\n" - "kernel void double_vector3_d(double3 double3d,\n" - " const double3 constdouble3d,\n" - " private double3 privatedouble3d,\n" - " private const double3 privateconstdouble3d)\n" - "{}\n", - "\n" - "kernel void double_vector4_p(constant double4*constantdouble4p,\n" - " constant double4 *restrict constantdouble4restrictp,\n" - " global double4*globaldouble4p,\n" - " global double4 *restrict globaldouble4restrictp,\n" - " global const double4* globalconstdouble4p,\n" - " global const double4 * restrict globalconstdouble4restrictp,\n" - " global volatile double4*globalvolatiledouble4p,\n" - " global volatile double4 *restrict globalvolatiledouble4restrictp,\n" - " global const volatile double4* globalconstvolatiledouble4p)\n" - "{}\n", - "\n" - "kernel void double_vector4_p2(global const volatile double4 * restrict globalconstvolatiledouble4restrictp,\n" - " local double4*localdouble4p,\n" - " local double4 *restrict localdouble4restrictp,\n" - " local const double4* localconstdouble4p,\n" - " local const double4 * restrict localconstdouble4restrictp,\n" - " local volatile double4*localvolatiledouble4p,\n" - " local volatile double4 *restrict localvolatiledouble4restrictp,\n" - " local const volatile double4* localconstvolatiledouble4p,\n" - " local const volatile double4 * restrict localconstvolatiledouble4restrictp)\n" - "{}\n", - "\n" - "kernel void double_vector4_d(double4 double4d,\n" - " const double4 constdouble4d,\n" - " private double4 privatedouble4d,\n" - " private const double4 privateconstdouble4d)\n" - "{}\n", - "\n" - "kernel void double_vector8_p(constant double8*constantdouble8p,\n" - " constant double8 *restrict constantdouble8restrictp,\n" - " global double8*globaldouble8p,\n" - " global double8 *restrict globaldouble8restrictp,\n" - " global const double8* globalconstdouble8p,\n" - " global const double8 * restrict globalconstdouble8restrictp,\n" - " global volatile double8*globalvolatiledouble8p,\n" - " global volatile double8 *restrict globalvolatiledouble8restrictp,\n" - " global const volatile double8* globalconstvolatiledouble8p)\n" - "{}\n", - "\n" - "kernel void double_vector8_p2(global const volatile double8 * restrict globalconstvolatiledouble8restrictp,\n" - " local double8*localdouble8p,\n" - " local double8 *restrict localdouble8restrictp,\n" - " local const double8* localconstdouble8p,\n" - " local const double8 * restrict localconstdouble8restrictp,\n" - " local volatile double8*localvolatiledouble8p,\n" - " local volatile double8 *restrict localvolatiledouble8restrictp,\n" - " local const volatile double8* localconstvolatiledouble8p,\n" - " local const volatile double8 * restrict localconstvolatiledouble8restrictp)\n" - "{}\n", - "\n" - "kernel void double_vector8_d(double8 double8d,\n" - " const double8 constdouble8d,\n" - " private double8 privatedouble8d,\n" - " private const double8 privateconstdouble8d)\n" - "{}\n", - "\n" - "kernel void double_vector16_p(constant double16*constantdouble16p,\n" - " constant double16 *restrict constantdouble16restrictp,\n" - " global double16*globaldouble16p,\n" - " global double16 *restrict globaldouble16restrictp,\n" - " global const double16* globalconstdouble16p,\n" - " global const double16 * restrict globalconstdouble16restrictp,\n" - " global volatile double16*globalvolatiledouble16p,\n" - " global volatile double16 *restrict globalvolatiledouble16restrictp,\n" - " global const volatile double16* globalconstvolatiledouble16p)\n" - "{}\n", - "\n" - "kernel void double_vector16_p2(global const volatile double16 * restrict globalconstvolatiledouble16restrictp,\n" - " local double16*localdouble16p,\n" - " local double16 *restrict localdouble16restrictp,\n" - " local const double16* localconstdouble16p,\n" - " local const double16 * restrict localconstdouble16restrictp,\n" - " local volatile double16*localvolatiledouble16p,\n" - " local volatile double16 *restrict localvolatiledouble16restrictp,\n" - " local const volatile double16* localconstvolatiledouble16p,\n" - " local const volatile double16 * restrict localconstvolatiledouble16restrictp)\n" - "{}\n", - "\n" - "kernel void double_vector16_d(double16 double16d,\n" - " const double16 constdouble16d,\n" - " private double16 privatedouble16d,\n" - " private const double16 privateconstdouble16d)\n" - "{}\n", - "\n" -}; - -// Support for optional 3D image data type -const char * image_3D_kernel_args[] = { - "#pragma OPENCL EXTENSION cl_khr_3d_image_writes: enable \n" - "kernel void image_d(read_only image3d_t image3d_td2,\n" - " write_only image3d_t image3d_td3)\n" - "{}\n", - "\n" -}; - -const char * image_3D_arg_info[][67] = { - { - "image_d", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image3d_t", "image3d_td2", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image3d_t", "image3d_td3", - NULL - }, -}; - -const char * double_arg_info[][77] = { - { - "double_scalar_p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double*", "constantdoublep", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "constantdoublerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double*", "globaldoublep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globaldoublerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double*", "globalconstdoublep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globalconstdoublerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "globalvolatiledoublep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globalvolatiledoublerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "globalconstvolatiledoublep", - NULL - }, - { - "double_scalar_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globalconstvolatiledoublerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double*", "localdoublep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localdoublerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double*", "localconstdoublep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localconstdoublerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "localvolatiledoublep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localvolatiledoublerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "localconstvolatiledoublep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localconstvolatiledoublerestrictp", - NULL - }, - { - "double_scalar_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "doubled", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "constdoubled", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "privatedoubled", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "privateconstdoubled", - NULL - }, - { - "double_vector2_p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double2*", "constantdouble2p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "constantdouble2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2*", "globaldouble2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globaldouble2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double2*", "globalconstdouble2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globalconstdouble2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "globalvolatiledouble2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globalvolatiledouble2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "globalconstvolatiledouble2p", - NULL - }, - { - "double_vector2_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globalconstvolatiledouble2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2*", "localdouble2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localdouble2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double2*", "localconstdouble2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localconstdouble2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "localvolatiledouble2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localvolatiledouble2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "localconstvolatiledouble2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localconstvolatiledouble2restrictp", - NULL - }, - { - "double_vector2_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "double2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "constdouble2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "privatedouble2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "privateconstdouble2d", - NULL - }, - { - "double_vector3_p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double3*", "constantdouble3p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "constantdouble3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3*", "globaldouble3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globaldouble3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double3*", "globalconstdouble3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globalconstdouble3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "globalvolatiledouble3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globalvolatiledouble3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "globalconstvolatiledouble3p", - NULL - }, - { - "double_vector3_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globalconstvolatiledouble3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3*", "localdouble3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localdouble3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double3*", "localconstdouble3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localconstdouble3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "localvolatiledouble3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localvolatiledouble3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "localconstvolatiledouble3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localconstvolatiledouble3restrictp", - NULL - }, - { - "double_vector3_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "double3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "constdouble3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "privatedouble3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "privateconstdouble3d", - NULL - }, - { - "double_vector4_p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double4*", "constantdouble4p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "constantdouble4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4*", "globaldouble4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globaldouble4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double4*", "globalconstdouble4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globalconstdouble4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "globalvolatiledouble4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globalvolatiledouble4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "globalconstvolatiledouble4p", - NULL - }, - { - "double_vector4_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globalconstvolatiledouble4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4*", "localdouble4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localdouble4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double4*", "localconstdouble4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localconstdouble4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "localvolatiledouble4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localvolatiledouble4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "localconstvolatiledouble4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localconstvolatiledouble4restrictp", - NULL - }, - { - "double_vector4_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "double4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "constdouble4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "privatedouble4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "privateconstdouble4d", - NULL - }, - { - "double_vector8_p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double8*", "constantdouble8p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "constantdouble8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8*", "globaldouble8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globaldouble8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double8*", "globalconstdouble8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globalconstdouble8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "globalvolatiledouble8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globalvolatiledouble8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "globalconstvolatiledouble8p", - NULL - }, - { - "double_vector8_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globalconstvolatiledouble8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8*", "localdouble8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localdouble8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double8*", "localconstdouble8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localconstdouble8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "localvolatiledouble8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localvolatiledouble8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "localconstvolatiledouble8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localconstvolatiledouble8restrictp", - NULL - }, - { - "double_vector8_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "double8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "constdouble8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "privatedouble8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "privateconstdouble8d", - NULL - }, - { - "double_vector16_p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double16*", "constantdouble16p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "constantdouble16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16*", "globaldouble16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globaldouble16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double16*", "globalconstdouble16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globalconstdouble16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "globalvolatiledouble16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globalvolatiledouble16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "globalconstvolatiledouble16p", - NULL - }, - { - "double_vector16_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globalconstvolatiledouble16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16*", "localdouble16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localdouble16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double16*", "localconstdouble16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localconstdouble16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "localvolatiledouble16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localvolatiledouble16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "localconstvolatiledouble16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localconstvolatiledouble16restrictp", - NULL - }, - { - "double_vector16_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "double16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "constdouble16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "privatedouble16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "privateconstdouble16d", - NULL - }, -}; - - -// Support for optional half data type -const char * half_kernel_args[] = { - "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n" - "\n" - "kernel void half_scalar_p(constant half*constanthalfp,\n" - " constant half *restrict constanthalfrestrictp,\n" - " global half*globalhalfp,\n" - " global half *restrict globalhalfrestrictp,\n" - " global const half* globalconsthalfp,\n" - " global const half * restrict globalconsthalfrestrictp,\n" - " global volatile half*globalvolatilehalfp,\n" - " global volatile half *restrict globalvolatilehalfrestrictp,\n" - " global const volatile half* globalconstvolatilehalfp)\n" - "{}\n", - "\n" - "kernel void half_scalar_p2(global const volatile half * restrict globalconstvolatilehalfrestrictp,\n" - " local half*localhalfp,\n" - " local half *restrict localhalfrestrictp,\n" - " local const half* localconsthalfp,\n" - " local const half * restrict localconsthalfrestrictp,\n" - " local volatile half*localvolatilehalfp,\n" - " local volatile half *restrict localvolatilehalfrestrictp,\n" - " local const volatile half* localconstvolatilehalfp,\n" - " local const volatile half * restrict localconstvolatilehalfrestrictp)\n" - "{}\n", - "\n" - "kernel void half_vector2_p(constant half2*constanthalf2p,\n" - " constant half2 *restrict constanthalf2restrictp,\n" - " global half2*globalhalf2p,\n" - " global half2 *restrict globalhalf2restrictp,\n" - " global const half2* globalconsthalf2p,\n" - " global const half2 * restrict globalconsthalf2restrictp,\n" - " global volatile half2*globalvolatilehalf2p,\n" - " global volatile half2 *restrict globalvolatilehalf2restrictp,\n" - " global const volatile half2* globalconstvolatilehalf2p)\n" - "{}\n", - "\n" - "kernel void half_vector2_p2(global const volatile half2 * restrict globalconstvolatilehalf2restrictp,\n" - " local half2*localhalf2p,\n" - " local half2 *restrict localhalf2restrictp,\n" - " local const half2* localconsthalf2p,\n" - " local const half2 * restrict localconsthalf2restrictp,\n" - " local volatile half2*localvolatilehalf2p,\n" - " local volatile half2 *restrict localvolatilehalf2restrictp,\n" - " local const volatile half2* localconstvolatilehalf2p,\n" - " local const volatile half2 * restrict localconstvolatilehalf2restrictp)\n" - "{}\n", - "\n" - "kernel void half_vector3_p(constant half3*constanthalf3p,\n" - " constant half3 *restrict constanthalf3restrictp,\n" - " global half3*globalhalf3p,\n" - " global half3 *restrict globalhalf3restrictp,\n" - " global const half3* globalconsthalf3p,\n" - " global const half3 * restrict globalconsthalf3restrictp,\n" - " global volatile half3*globalvolatilehalf3p,\n" - " global volatile half3 *restrict globalvolatilehalf3restrictp,\n" - " global const volatile half3* globalconstvolatilehalf3p)\n" - "{}\n", - "\n" - "kernel void half_vector3_p2(global const volatile half3 * restrict globalconstvolatilehalf3restrictp,\n" - " local half3*localhalf3p,\n" - " local half3 *restrict localhalf3restrictp,\n" - " local const half3* localconsthalf3p,\n" - " local const half3 * restrict localconsthalf3restrictp,\n" - " local volatile half3*localvolatilehalf3p,\n" - " local volatile half3 *restrict localvolatilehalf3restrictp,\n" - " local const volatile half3* localconstvolatilehalf3p,\n" - " local const volatile half3 * restrict localconstvolatilehalf3restrictp)\n" - "{}\n", - "\n" - "kernel void half_vector4_p(constant half4*constanthalf4p,\n" - " constant half4 *restrict constanthalf4restrictp,\n" - " global half4*globalhalf4p,\n" - " global half4 *restrict globalhalf4restrictp,\n" - " global const half4* globalconsthalf4p,\n" - " global const half4 * restrict globalconsthalf4restrictp,\n" - " global volatile half4*globalvolatilehalf4p,\n" - " global volatile half4 *restrict globalvolatilehalf4restrictp,\n" - " global const volatile half4* globalconstvolatilehalf4p)\n" - "{}\n", - "\n" - "kernel void half_vector4_p2(global const volatile half4 * restrict globalconstvolatilehalf4restrictp,\n" - " local half4*localhalf4p,\n" - " local half4 *restrict localhalf4restrictp,\n" - " local const half4* localconsthalf4p,\n" - " local const half4 * restrict localconsthalf4restrictp,\n" - " local volatile half4*localvolatilehalf4p,\n" - " local volatile half4 *restrict localvolatilehalf4restrictp,\n" - " local const volatile half4* localconstvolatilehalf4p,\n" - " local const volatile half4 * restrict localconstvolatilehalf4restrictp)\n" - "{}\n", - "\n" - "kernel void half_vector8_p(constant half8*constanthalf8p,\n" - " constant half8 *restrict constanthalf8restrictp,\n" - " global half8*globalhalf8p,\n" - " global half8 *restrict globalhalf8restrictp,\n" - " global const half8* globalconsthalf8p,\n" - " global const half8 * restrict globalconsthalf8restrictp,\n" - " global volatile half8*globalvolatilehalf8p,\n" - " global volatile half8 *restrict globalvolatilehalf8restrictp,\n" - " global const volatile half8* globalconstvolatilehalf8p)\n" - "{}\n", - "\n" - "kernel void half_vector8_p2(global const volatile half8 * restrict globalconstvolatilehalf8restrictp,\n" - " local half8*localhalf8p,\n" - " local half8 *restrict localhalf8restrictp,\n" - " local const half8* localconsthalf8p,\n" - " local const half8 * restrict localconsthalf8restrictp,\n" - " local volatile half8*localvolatilehalf8p,\n" - " local volatile half8 *restrict localvolatilehalf8restrictp,\n" - " local const volatile half8* localconstvolatilehalf8p,\n" - " local const volatile half8 * restrict localconstvolatilehalf8restrictp)\n" - "{}\n", - "\n" - "kernel void half_vector16_p(constant half16*constanthalf16p,\n" - " constant half16 *restrict constanthalf16restrictp,\n" - " global half16*globalhalf16p,\n" - " global half16 *restrict globalhalf16restrictp,\n" - " global const half16* globalconsthalf16p,\n" - " global const half16 * restrict globalconsthalf16restrictp,\n" - " global volatile half16*globalvolatilehalf16p,\n" - " global volatile half16 *restrict globalvolatilehalf16restrictp,\n" - " global const volatile half16* globalconstvolatilehalf16p)\n" - "{}\n", - "\n" - "kernel void half_vector16_p2(global const volatile half16 * restrict globalconstvolatilehalf16restrictp,\n" - " local half16*localhalf16p,\n" - " local half16 *restrict localhalf16restrictp,\n" - " local const half16* localconsthalf16p,\n" - " local const half16 * restrict localconsthalf16restrictp,\n" - " local volatile half16*localvolatilehalf16p,\n" - " local volatile half16 *restrict localvolatilehalf16restrictp,\n" - " local const volatile half16* localconstvolatilehalf16p,\n" - " local const volatile half16 * restrict localconstvolatilehalf16restrictp)\n" - "{}\n", - "\n" -}; - -const char * half_arg_info[][77] = { - { - "half_scalar_p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half*", "constanthalfp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "constanthalfrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half*", "globalhalfp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalhalfrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half*", "globalconsthalfp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalconsthalfrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "globalvolatilehalfp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalvolatilehalfrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "globalconstvolatilehalfp", - NULL - }, - { - "half_scalar_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalconstvolatilehalfrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half*", "localhalfp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localhalfrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half*", "localconsthalfp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localconsthalfrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "localvolatilehalfp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localvolatilehalfrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "localconstvolatilehalfp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localconstvolatilehalfrestrictp", - NULL - }, - { - "half_vector2_p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half2*", "constanthalf2p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "constanthalf2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2*", "globalhalf2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalhalf2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half2*", "globalconsthalf2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalconsthalf2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "globalvolatilehalf2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalvolatilehalf2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "globalconstvolatilehalf2p", - NULL - }, - { - "half_vector2_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalconstvolatilehalf2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2*", "localhalf2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localhalf2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half2*", "localconsthalf2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localconsthalf2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "localvolatilehalf2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localvolatilehalf2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "localconstvolatilehalf2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localconstvolatilehalf2restrictp", - NULL - }, - { - "half_vector3_p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half3*", "constanthalf3p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "constanthalf3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3*", "globalhalf3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalhalf3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half3*", "globalconsthalf3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalconsthalf3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "globalvolatilehalf3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalvolatilehalf3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "globalconstvolatilehalf3p", - NULL - }, - { - "half_vector3_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalconstvolatilehalf3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3*", "localhalf3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localhalf3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half3*", "localconsthalf3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localconsthalf3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "localvolatilehalf3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localvolatilehalf3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "localconstvolatilehalf3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localconstvolatilehalf3restrictp", - NULL - }, - { - "half_vector4_p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half4*", "constanthalf4p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "constanthalf4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4*", "globalhalf4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalhalf4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half4*", "globalconsthalf4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalconsthalf4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "globalvolatilehalf4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalvolatilehalf4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "globalconstvolatilehalf4p", - NULL - }, - { - "half_vector4_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalconstvolatilehalf4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4*", "localhalf4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localhalf4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half4*", "localconsthalf4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localconsthalf4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "localvolatilehalf4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localvolatilehalf4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "localconstvolatilehalf4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localconstvolatilehalf4restrictp", - NULL - }, - { - "half_vector8_p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half8*", "constanthalf8p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "constanthalf8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8*", "globalhalf8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalhalf8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half8*", "globalconsthalf8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalconsthalf8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "globalvolatilehalf8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalvolatilehalf8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "globalconstvolatilehalf8p", - NULL - }, - { - "half_vector8_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalconstvolatilehalf8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8*", "localhalf8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localhalf8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half8*", "localconsthalf8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localconsthalf8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "localvolatilehalf8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localvolatilehalf8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "localconstvolatilehalf8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localconstvolatilehalf8restrictp", - NULL - }, - { - "half_vector16_p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half16*", "constanthalf16p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "constanthalf16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16*", "globalhalf16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalhalf16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half16*", "globalconsthalf16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalconsthalf16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "globalvolatilehalf16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalvolatilehalf16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "globalconstvolatilehalf16p", - NULL - }, - { - "half_vector16_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalconstvolatilehalf16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16*", "localhalf16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localhalf16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half16*", "localconsthalf16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localconsthalf16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "localvolatilehalf16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localvolatilehalf16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "localconstvolatilehalf16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localconstvolatilehalf16restrictp", - NULL - }, -}; - -const char * long_kernel_args[] = { - "kernel void constant_scalar_p2(constant long* constantlongp,\n" - " constant ulong * constantulongp)\n" - "{}\n", - "kernel void constant_scalar_p3(constant unsigned long*constantunsignedlongp)\n" - "{}\n", - "\n" - "kernel void constant_scalar_restrict_p2(constant long*restrict constantlongrestrictp,\n" - " constant ulong *restrict constantulongrestrictp)\n" - "{}\n", - "kernel void constant_scalar_restrict_p3(constant unsigned long* restrict constantunsignedlongrestrictp)\n" - "{}\n", - "\n" - "kernel void global_scalar_p(global long* globallongp,\n" - " global ulong * globalulongp,\n" - " global unsigned long*globalunsignedlongp)\n" - "{}\n", - "\n" - "kernel void global_scalar_restrict_p(global long*restrict globallongrestrictp,\n" - " global ulong *restrict globalulongrestrictp,\n" - " global unsigned long* restrict globalunsignedlongrestrictp)\n" - "{}\n", - "\n" - "kernel void global_const_scalar_p(global const long* globalconstlongp,\n" - " global const ulong * globalconstulongp,\n" - " global const unsigned long*globalconstunsignedlongp)\n" - "{}\n", - "\n" - "kernel void global_const_scalar_restrict_p(global const long*restrict globalconstlongrestrictp,\n" - " global const ulong *restrict globalconstulongrestrictp,\n" - " global const unsigned long* restrict globalconstunsignedlongrestrictp)\n" - "{}\n", - "\n" - "kernel void global_volatile_scalar_p(global volatile long* globalvolatilelongp,\n" - " global volatile ulong * globalvolatileulongp,\n" - " global volatile unsigned long*globalvolatileunsignedlongp)\n" - "{}\n", - "\n" - "kernel void global_volatile_scalar_restrict_p(global volatile long*restrict globalvolatilelongrestrictp,\n" - " global volatile ulong *restrict globalvolatileulongrestrictp,\n" - " global volatile unsigned long* restrict globalvolatileunsignedlongrestrictp)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_scalar_p(global const volatile long* globalconstvolatilelongp,\n" - " global const volatile ulong * globalconstvolatileulongp,\n" - " global const volatile unsigned long*globalconstvolatileunsignedlongp)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_scalar_restrict_p(global const volatile long*restrict globalconstvolatilelongrestrictp,\n" - " global const volatile ulong *restrict globalconstvolatileulongrestrictp,\n" - " global const volatile unsigned long* restrict globalconstvolatileunsignedlongrestrictp)\n" - "{}\n", - "\n" - "kernel void local_scalar_p(local long* locallongp,\n" - " local ulong * localulongp,\n" - " local unsigned long*localunsignedlongp)\n" - "{}\n", - "\n" - "kernel void local_scalar_restrict_p(local long*restrict locallongrestrictp,\n" - " local ulong *restrict localulongrestrictp,\n" - " local unsigned long* restrict localunsignedlongrestrictp)\n" - "{}\n", - "\n" - "kernel void local_const_scalar_p(local const long* localconstlongp,\n" - " local const ulong * localconstulongp,\n" - " local const unsigned long*localconstunsignedlongp)\n" - "{}\n", - "\n" - "kernel void local_const_scalar_restrict_p(local const long*restrict localconstlongrestrictp,\n" - " local const ulong *restrict localconstulongrestrictp,\n" - " local const unsigned long* restrict localconstunsignedlongrestrictp)\n" - "{}\n", - "\n" - "kernel void local_volatile_scalar_p(local volatile long* localvolatilelongp,\n" - " local volatile ulong * localvolatileulongp,\n" - " local volatile unsigned long*localvolatileunsignedlongp)\n" - "{}\n", - "\n" - "kernel void local_volatile_scalar_restrict_p(local volatile long*restrict localvolatilelongrestrictp,\n" - " local volatile ulong *restrict localvolatileulongrestrictp,\n" - " local volatile unsigned long* restrict localvolatileunsignedlongrestrictp)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_scalar_p(local const volatile long* localconstvolatilelongp,\n" - " local const volatile ulong * localconstvolatileulongp,\n" - " local const volatile unsigned long*localconstvolatileunsignedlongp)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_scalar_restrict_p(local const volatile long*restrict localconstvolatilelongrestrictp,\n" - " local const volatile ulong *restrict localconstvolatileulongrestrictp,\n" - " local const volatile unsigned long* restrict localconstvolatileunsignedlongrestrictp)\n" - "{}\n", - "\n" - "kernel void scalar_d(long longd,\n" - " ulong ulongd,\n" - " unsigned long unsignedlongd)\n" - "{}\n", - "\n" - "kernel void const_scalar_d(const long constlongd,\n" - " const ulong constulongd,\n" - " const unsigned long constunsignedlongd)\n" - "{}\n", - "\n" - "kernel void private_scalar_d(private long privatelongd,\n" - " private ulong privateulongd,\n" - " private unsigned long privateunsignedlongd)\n" - "{}\n", - "\n" - "kernel void private_const_scalar_d(private const long privateconstlongd,\n" - " private const ulong privateconstulongd,\n" - " private const unsigned long privateconstunsignedlongd)\n" - "{}\n", - "\n" - "kernel void constant_vector2_p1(constant long2* constantlong2p,\n" - " constant ulong2 * constantulong2p)\n" - "{}\n", - "\n" - "kernel void constant_vector2_restrict_p1(constant long2 * restrict constantlong2restrictp,\n" - " constant ulong2*restrict constantulong2restrictp)\n" - "{}\n", - "\n" - "kernel void global_vector2_p(global long2* globallong2p,\n" - " global ulong2 * globalulong2p)\n" - "{}\n", - "\n" - "kernel void global_vector2_restrict_p(global long2 * restrict globallong2restrictp,\n" - " global ulong2*restrict globalulong2restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_vector2_p(global const long2*globalconstlong2p,\n" - " global const ulong2 *globalconstulong2p)\n" - "{}\n", - "\n" - "kernel void global_const_vector2_restrict_p(global const long2 *restrict globalconstlong2restrictp,\n" - " global const ulong2* restrict globalconstulong2restrictp)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector2_p(global volatile long2* globalvolatilelong2p,\n" - " global volatile ulong2 * globalvolatileulong2p)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector2_restrict_p(global volatile long2 * restrict globalvolatilelong2restrictp,\n" - " global volatile ulong2*restrict globalvolatileulong2restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector2_p(global const volatile long2*globalconstvolatilelong2p,\n" - " global const volatile ulong2 *globalconstvolatileulong2p)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector2_restrict_p(global const volatile long2 *restrict globalconstvolatilelong2restrictp,\n" - " global const volatile ulong2* restrict globalconstvolatileulong2restrictp)\n" - "{}\n", - "\n" - "kernel void local_vector2_p(local long2* locallong2p,\n" - " local ulong2 * localulong2p)\n" - "{}\n", - "\n" - "kernel void local_vector2_restrict_p(local long2 * restrict locallong2restrictp,\n" - " local ulong2*restrict localulong2restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_vector2_p(local const long2*localconstlong2p,\n" - " local const ulong2 *localconstulong2p)\n" - "{}\n", - "\n" - "kernel void local_const_vector2_restrict_p(local const long2 *restrict localconstlong2restrictp,\n" - " local const ulong2* restrict localconstulong2restrictp)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector2_p(local volatile long2* localvolatilelong2p,\n" - " local volatile ulong2 * localvolatileulong2p)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector2_restrict_p(local volatile long2 * restrict localvolatilelong2restrictp,\n" - " local volatile ulong2*restrict localvolatileulong2restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector2_p(local const volatile long2*localconstvolatilelong2p,\n" - " local const volatile ulong2 *localconstvolatileulong2p)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector2_restrict_p(local const volatile long2 *restrict localconstvolatilelong2restrictp,\n" - " local const volatile ulong2* restrict localconstvolatileulong2restrictp)\n" - "{}\n", - "\n" - "kernel void vector2_d(long2 long2d,\n" - " ulong2 ulong2d)\n" - "{}\n", - "\n" - "kernel void const_vector2_d(const long2 constlong2d,\n" - " const ulong2 constulong2d)\n" - "{}\n", - "\n" - "kernel void private_vector2_d(private long2 privatelong2d,\n" - " private ulong2 privateulong2d)\n" - "{}\n", - "\n" - "kernel void private_const_vector2_d(private const long2 privateconstlong2d,\n" - " private const ulong2 privateconstulong2d)\n" - "{}\n", - "\n" - "kernel void constant_vector3_p1(constant long3* constantlong3p,\n" - " constant ulong3 * constantulong3p)\n" - "{}\n", - "\n" - "kernel void constant_vector3_restrict_p1(constant long3 * restrict constantlong3restrictp,\n" - " constant ulong3*restrict constantulong3restrictp)\n" - "{}\n", - "\n" - "kernel void global_vector3_p(global long3* globallong3p,\n" - " global ulong3 * globalulong3p)\n" - "{}\n", - "\n" - "kernel void global_vector3_restrict_p(global long3 * restrict globallong3restrictp,\n" - " global ulong3*restrict globalulong3restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_vector3_p(global const long3*globalconstlong3p,\n" - " global const ulong3 *globalconstulong3p)\n" - "{}\n", - "\n" - "kernel void global_const_vector3_restrict_p(global const long3 *restrict globalconstlong3restrictp,\n" - " global const ulong3* restrict globalconstulong3restrictp)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector3_p(global volatile long3* globalvolatilelong3p,\n" - " global volatile ulong3 * globalvolatileulong3p)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector3_restrict_p(global volatile long3 * restrict globalvolatilelong3restrictp,\n" - " global volatile ulong3*restrict globalvolatileulong3restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector3_p(global const volatile long3*globalconstvolatilelong3p,\n" - " global const volatile ulong3 *globalconstvolatileulong3p)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector3_restrict_p(global const volatile long3 *restrict globalconstvolatilelong3restrictp,\n" - " global const volatile ulong3* restrict globalconstvolatileulong3restrictp)\n" - "{}\n", - "\n" - "kernel void local_vector3_p(local long3* locallong3p,\n" - " local ulong3 * localulong3p)\n" - "{}\n", - "\n" - "kernel void local_vector3_restrict_p(local long3 * restrict locallong3restrictp,\n" - " local ulong3*restrict localulong3restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_vector3_p(local const long3*localconstlong3p,\n" - " local const ulong3 *localconstulong3p)\n" - "{}\n", - "\n" - "kernel void local_const_vector3_restrict_p(local const long3 *restrict localconstlong3restrictp,\n" - " local const ulong3* restrict localconstulong3restrictp)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector3_p(local volatile long3* localvolatilelong3p,\n" - " local volatile ulong3 * localvolatileulong3p)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector3_restrict_p(local volatile long3 * restrict localvolatilelong3restrictp,\n" - " local volatile ulong3*restrict localvolatileulong3restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector3_p(local const volatile long3*localconstvolatilelong3p,\n" - " local const volatile ulong3 *localconstvolatileulong3p)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector3_restrict_p(local const volatile long3 *restrict localconstvolatilelong3restrictp,\n" - " local const volatile ulong3* restrict localconstvolatileulong3restrictp)\n" - "{}\n", - "\n" - "kernel void vector3_d(long3 long3d,\n" - " ulong3 ulong3d)\n" - "{}\n", - "\n" - "kernel void const_vector3_d(const long3 constlong3d,\n" - " const ulong3 constulong3d)\n" - "{}\n", - "\n" - "kernel void private_vector3_d(private long3 privatelong3d,\n" - " private ulong3 privateulong3d)\n" - "{}\n", - "\n" - "kernel void private_const_vector3_d(private const long3 privateconstlong3d,\n" - " private const ulong3 privateconstulong3d)\n" - "{}\n", - "\n" - "kernel void constant_vector4_p1(constant long4* constantlong4p,\n" - " constant ulong4 * constantulong4p)\n" - "{}\n", - "\n" - "kernel void constant_vector4_restrict_p1(constant long4 * restrict constantlong4restrictp,\n" - " constant ulong4*restrict constantulong4restrictp)\n" - "{}\n", - "\n" - "kernel void global_vector4_p(global long4* globallong4p,\n" - " global ulong4 * globalulong4p)\n" - "{}\n", - "\n" - "kernel void global_vector4_restrict_p(global long4 * restrict globallong4restrictp,\n" - " global ulong4*restrict globalulong4restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_vector4_p(global const long4*globalconstlong4p,\n" - " global const ulong4 *globalconstulong4p)\n" - "{}\n", - "\n" - "kernel void global_const_vector4_restrict_p(global const long4 *restrict globalconstlong4restrictp,\n" - " global const ulong4* restrict globalconstulong4restrictp)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector4_p(global volatile long4* globalvolatilelong4p,\n" - " global volatile ulong4 * globalvolatileulong4p)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector4_restrict_p(global volatile long4 * restrict globalvolatilelong4restrictp,\n" - " global volatile ulong4*restrict globalvolatileulong4restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector4_p(global const volatile long4*globalconstvolatilelong4p,\n" - " global const volatile ulong4 *globalconstvolatileulong4p)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector4_restrict_p(global const volatile long4 *restrict globalconstvolatilelong4restrictp,\n" - " global const volatile ulong4* restrict globalconstvolatileulong4restrictp)\n" - "{}\n", - "\n" - "kernel void local_vector4_p(local long4* locallong4p,\n" - " local ulong4 * localulong4p)\n" - "{}\n", - "\n" - "kernel void local_vector4_restrict_p(local long4 * restrict locallong4restrictp,\n" - " local ulong4*restrict localulong4restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_vector4_p(local const long4*localconstlong4p,\n" - " local const ulong4 *localconstulong4p)\n" - "{}\n", - "\n" - "kernel void local_const_vector4_restrict_p(local const long4 *restrict localconstlong4restrictp,\n" - " local const ulong4* restrict localconstulong4restrictp)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector4_p(local volatile long4* localvolatilelong4p,\n" - " local volatile ulong4 * localvolatileulong4p)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector4_restrict_p(local volatile long4 * restrict localvolatilelong4restrictp,\n" - " local volatile ulong4*restrict localvolatileulong4restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector4_p(local const volatile long4*localconstvolatilelong4p,\n" - " local const volatile ulong4 *localconstvolatileulong4p)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector4_restrict_p(local const volatile long4 *restrict localconstvolatilelong4restrictp,\n" - " local const volatile ulong4* restrict localconstvolatileulong4restrictp)\n" - "{}\n", - "\n" - "kernel void vector4_d(long4 long4d,\n" - " ulong4 ulong4d)\n" - "{}\n", - "\n" - "kernel void const_vector4_d(const long4 constlong4d,\n" - " const ulong4 constulong4d)\n" - "{}\n", - "\n" - "kernel void private_vector4_d(private long4 privatelong4d,\n" - " private ulong4 privateulong4d)\n" - "{}\n", - "\n" - "kernel void private_const_vector4_d(private const long4 privateconstlong4d,\n" - " private const ulong4 privateconstulong4d)\n" - "{}\n", - "\n" - "kernel void constant_vector8_p1(constant long8* constantlong8p,\n" - " constant ulong8 * constantulong8p)\n" - "{}\n", - "\n" - "kernel void constant_vector8_restrict_p1(constant long8 * restrict constantlong8restrictp,\n" - " constant ulong8*restrict constantulong8restrictp)\n" - "{}\n", - "\n" - "kernel void global_vector8_p(global long8* globallong8p,\n" - " global ulong8 * globalulong8p)\n" - "{}\n", - "\n" - "kernel void global_vector8_restrict_p(global long8 * restrict globallong8restrictp,\n" - " global ulong8*restrict globalulong8restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_vector8_p(global const long8*globalconstlong8p,\n" - " global const ulong8 *globalconstulong8p)\n" - "{}\n", - "\n" - "kernel void global_const_vector8_restrict_p(global const long8 *restrict globalconstlong8restrictp,\n" - " global const ulong8* restrict globalconstulong8restrictp)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector8_p(global volatile long8* globalvolatilelong8p,\n" - " global volatile ulong8 * globalvolatileulong8p)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector8_restrict_p(global volatile long8 * restrict globalvolatilelong8restrictp,\n" - " global volatile ulong8*restrict globalvolatileulong8restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector8_p(global const volatile long8*globalconstvolatilelong8p,\n" - " global const volatile ulong8 *globalconstvolatileulong8p)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector8_restrict_p(global const volatile long8 *restrict globalconstvolatilelong8restrictp,\n" - " global const volatile ulong8* restrict globalconstvolatileulong8restrictp)\n" - "{}\n", - "\n" - "kernel void local_vector8_p(local long8* locallong8p,\n" - " local ulong8 * localulong8p)\n" - "{}\n", - "\n" - "kernel void local_vector8_restrict_p(local long8 * restrict locallong8restrictp,\n" - " local ulong8*restrict localulong8restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_vector8_p(local const long8*localconstlong8p,\n" - " local const ulong8 *localconstulong8p)\n" - "{}\n", - "\n" - "kernel void local_const_vector8_restrict_p(local const long8 *restrict localconstlong8restrictp,\n" - " local const ulong8* restrict localconstulong8restrictp)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector8_p(local volatile long8* localvolatilelong8p,\n" - " local volatile ulong8 * localvolatileulong8p)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector8_restrict_p(local volatile long8 * restrict localvolatilelong8restrictp,\n" - " local volatile ulong8*restrict localvolatileulong8restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector8_p(local const volatile long8*localconstvolatilelong8p,\n" - " local const volatile ulong8 *localconstvolatileulong8p)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector8_restrict_p(local const volatile long8 *restrict localconstvolatilelong8restrictp,\n" - " local const volatile ulong8* restrict localconstvolatileulong8restrictp)\n" - "{}\n", - "\n" - "kernel void vector8_d(long8 long8d,\n" - " ulong8 ulong8d)\n" - "{}\n", - "\n" - "kernel void const_vector8_d(const long8 constlong8d,\n" - " const ulong8 constulong8d)\n" - "{}\n", - "\n" - "kernel void private_vector8_d(private long8 privatelong8d,\n" - " private ulong8 privateulong8d)\n" - "{}\n", - "\n" - "kernel void private_const_vector8_d(private const long8 privateconstlong8d,\n" - " private const ulong8 privateconstulong8d)\n" - "{}\n", - "\n" - "kernel void constant_vector16_p1(constant long16* constantlong16p,\n" - " constant ulong16 * constantulong16p)\n" - "{}\n", - "\n" - "kernel void constant_vector16_restrict_p1(constant long16 * restrict constantlong16restrictp,\n" - " constant ulong16*restrict constantulong16restrictp)\n" - "{}\n", - "\n" - "kernel void global_vector16_p(global long16* globallong16p,\n" - " global ulong16 * globalulong16p)\n" - "{}\n", - "\n" - "kernel void global_vector16_restrict_p(global long16 * restrict globallong16restrictp,\n" - " global ulong16*restrict globalulong16restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_vector16_p(global const long16*globalconstlong16p,\n" - " global const ulong16 *globalconstulong16p)\n" - "{}\n", - "\n" - "kernel void global_const_vector16_restrict_p(global const long16 *restrict globalconstlong16restrictp,\n" - " global const ulong16* restrict globalconstulong16restrictp)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector16_p(global volatile long16* globalvolatilelong16p,\n" - " global volatile ulong16 * globalvolatileulong16p)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector16_restrict_p(global volatile long16 * restrict globalvolatilelong16restrictp,\n" - " global volatile ulong16*restrict globalvolatileulong16restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector16_p(global const volatile long16*globalconstvolatilelong16p,\n" - " global const volatile ulong16 *globalconstvolatileulong16p)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector16_restrict_p(global const volatile long16 *restrict globalconstvolatilelong16restrictp,\n" - " global const volatile ulong16* restrict globalconstvolatileulong16restrictp)\n" - "{}\n", - "\n" - "kernel void local_vector16_p(local long16* locallong16p,\n" - " local ulong16 * localulong16p)\n" - "{}\n", - "\n" - "kernel void local_vector16_restrict_p(local long16 * restrict locallong16restrictp,\n" - " local ulong16*restrict localulong16restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_vector16_p(local const long16*localconstlong16p,\n" - " local const ulong16 *localconstulong16p)\n" - "{}\n", - "\n" - "kernel void local_const_vector16_restrict_p(local const long16 *restrict localconstlong16restrictp,\n" - " local const ulong16* restrict localconstulong16restrictp)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector16_p(local volatile long16* localvolatilelong16p,\n" - " local volatile ulong16 * localvolatileulong16p)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector16_restrict_p(local volatile long16 * restrict localvolatilelong16restrictp,\n" - " local volatile ulong16*restrict localvolatileulong16restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector16_p(local const volatile long16*localconstvolatilelong16p,\n" - " local const volatile ulong16 *localconstvolatileulong16p)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector16_restrict_p(local const volatile long16 *restrict localconstvolatilelong16restrictp,\n" - " local const volatile ulong16* restrict localconstvolatileulong16restrictp)\n" - "{}\n", - "\n" - "kernel void vector16_d(long16 long16d,\n" - " ulong16 ulong16d)\n" - "{}\n", - "\n" - "kernel void const_vector16_d(const long16 constlong16d,\n" - " const ulong16 constulong16d)\n" - "{}\n", - "\n" - "kernel void private_vector16_d(private long16 privatelong16d,\n" - " private ulong16 privateulong16d)\n" - "{}\n", - "\n" - "kernel void private_const_vector16_d(private const long16 privateconstlong16d,\n" - " private const ulong16 privateconstulong16d)\n" - "{}\n", - "\n" -}; - -const char * long_arg_info[][72] = { - // The minimum value of CL_DEVICE_MAX_CONSTANT_ARGS is 4 - { - "constant_scalar_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long*", "constantlongp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "constantulongp", - NULL - }, - { - "constant_scalar_p3", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "constantunsignedlongp", - NULL - }, - { - "constant_scalar_restrict_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "constantlongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "constantulongrestrictp", - NULL - }, - { - "constant_scalar_restrict_p3", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "constantunsignedlongrestrictp", - NULL - }, - { - "global_scalar_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long*", "globallongp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "globalulongp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "globalunsignedlongp", - NULL - }, - { - "global_scalar_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globallongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalulongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalunsignedlongrestrictp", - NULL - }, - { - "global_const_scalar_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long*", "globalconstlongp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "globalconstulongp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "globalconstunsignedlongp", - NULL - }, - { - "global_const_scalar_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globalconstlongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstulongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstunsignedlongrestrictp", - NULL - }, - { - "global_volatile_scalar_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "globalvolatilelongp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalvolatileulongp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalvolatileunsignedlongp", - NULL - }, - { - "global_volatile_scalar_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globalvolatilelongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalvolatileulongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalvolatileunsignedlongrestrictp", - NULL - }, - { - "global_const_volatile_scalar_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "globalconstvolatilelongp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalconstvolatileulongp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalconstvolatileunsignedlongp", - NULL - }, - { - "global_const_volatile_scalar_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globalconstvolatilelongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstvolatileulongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstvolatileunsignedlongrestrictp", - NULL - }, - { - "local_scalar_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long*", "locallongp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "localulongp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "localunsignedlongp", - NULL - }, - { - "local_scalar_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "locallongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localulongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localunsignedlongrestrictp", - NULL - }, - { - "local_const_scalar_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long*", "localconstlongp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "localconstulongp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "localconstunsignedlongp", - NULL - }, - { - "local_const_scalar_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "localconstlongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstulongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstunsignedlongrestrictp", - NULL - }, - { - "local_volatile_scalar_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "localvolatilelongp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localvolatileulongp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localvolatileunsignedlongp", - NULL - }, - { - "local_volatile_scalar_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "localvolatilelongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localvolatileulongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localvolatileunsignedlongrestrictp", - NULL - }, - { - "local_const_volatile_scalar_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "localconstvolatilelongp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localconstvolatileulongp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localconstvolatileunsignedlongp", - NULL - }, - { - "local_const_volatile_scalar_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "localconstvolatilelongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstvolatileulongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstvolatileunsignedlongrestrictp", - NULL - }, - { - "scalar_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "longd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "ulongd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "unsignedlongd", - NULL - }, - { - "const_scalar_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "constlongd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "constulongd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "constunsignedlongd", - NULL - }, - { - "private_scalar_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "privatelongd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateulongd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateunsignedlongd", - NULL - }, - { - "private_const_scalar_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "privateconstlongd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateconstulongd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateconstunsignedlongd", - NULL - }, - { - "constant_vector2_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long2*", "constantlong2p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong2*", "constantulong2p", - NULL - }, - { - "constant_vector2_restrict_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "constantlong2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "constantulong2restrictp", - NULL - }, - { - "global_vector2_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2*", "globallong2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2*", "globalulong2p", - NULL - }, - { - "global_vector2_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globallong2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalulong2restrictp", - NULL - }, - { - "global_const_vector2_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long2*", "globalconstlong2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong2*", "globalconstulong2p", - NULL - }, - { - "global_const_vector2_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globalconstlong2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalconstulong2restrictp", - NULL - }, - { - "global_volatile_vector2_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "globalvolatilelong2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "globalvolatileulong2p", - NULL - }, - { - "global_volatile_vector2_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globalvolatilelong2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalvolatileulong2restrictp", - NULL - }, - { - "global_const_volatile_vector2_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "globalconstvolatilelong2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "globalconstvolatileulong2p", - NULL - }, - { - "global_const_volatile_vector2_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globalconstvolatilelong2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalconstvolatileulong2restrictp", - NULL - }, - { - "local_vector2_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2*", "locallong2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2*", "localulong2p", - NULL - }, - { - "local_vector2_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "locallong2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localulong2restrictp", - NULL - }, - { - "local_const_vector2_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long2*", "localconstlong2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong2*", "localconstulong2p", - NULL - }, - { - "local_const_vector2_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "localconstlong2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localconstulong2restrictp", - NULL - }, - { - "local_volatile_vector2_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "localvolatilelong2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "localvolatileulong2p", - NULL - }, - { - "local_volatile_vector2_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "localvolatilelong2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localvolatileulong2restrictp", - NULL - }, - { - "local_const_volatile_vector2_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "localconstvolatilelong2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "localconstvolatileulong2p", - NULL - }, - { - "local_const_volatile_vector2_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "localconstvolatilelong2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localconstvolatileulong2restrictp", - NULL - }, - { - "vector2_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "long2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "ulong2d", - NULL - }, - { - "const_vector2_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "constlong2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "constulong2d", - NULL - }, - { - "private_vector2_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "privatelong2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "privateulong2d", - NULL - }, - { - "private_const_vector2_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "privateconstlong2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "privateconstulong2d", - NULL - }, - { - "constant_vector3_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long3*", "constantlong3p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong3*", "constantulong3p", - NULL - }, - { - "constant_vector3_restrict_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "constantlong3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "constantulong3restrictp", - NULL - }, - { - "global_vector3_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3*", "globallong3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3*", "globalulong3p", - NULL - }, - { - "global_vector3_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globallong3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalulong3restrictp", - NULL - }, - { - "global_const_vector3_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long3*", "globalconstlong3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong3*", "globalconstulong3p", - NULL - }, - { - "global_const_vector3_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globalconstlong3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalconstulong3restrictp", - NULL - }, - { - "global_volatile_vector3_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "globalvolatilelong3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "globalvolatileulong3p", - NULL - }, - { - "global_volatile_vector3_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globalvolatilelong3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalvolatileulong3restrictp", - NULL - }, - { - "global_const_volatile_vector3_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "globalconstvolatilelong3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "globalconstvolatileulong3p", - NULL - }, - { - "global_const_volatile_vector3_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globalconstvolatilelong3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalconstvolatileulong3restrictp", - NULL - }, - { - "local_vector3_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3*", "locallong3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3*", "localulong3p", - NULL - }, - { - "local_vector3_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "locallong3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localulong3restrictp", - NULL - }, - { - "local_const_vector3_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long3*", "localconstlong3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong3*", "localconstulong3p", - NULL - }, - { - "local_const_vector3_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "localconstlong3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localconstulong3restrictp", - NULL - }, - { - "local_volatile_vector3_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "localvolatilelong3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "localvolatileulong3p", - NULL - }, - { - "local_volatile_vector3_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "localvolatilelong3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localvolatileulong3restrictp", - NULL - }, - { - "local_const_volatile_vector3_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "localconstvolatilelong3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "localconstvolatileulong3p", - NULL - }, - { - "local_const_volatile_vector3_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "localconstvolatilelong3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localconstvolatileulong3restrictp", - NULL - }, - { - "vector3_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "long3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "ulong3d", - NULL - }, - { - "const_vector3_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "constlong3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "constulong3d", - NULL - }, - { - "private_vector3_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "privatelong3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "privateulong3d", - NULL - }, - { - "private_const_vector3_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "privateconstlong3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "privateconstulong3d", - NULL - }, - { - "constant_vector4_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long4*", "constantlong4p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong4*", "constantulong4p", - NULL - }, - { - "constant_vector4_restrict_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "constantlong4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "constantulong4restrictp", - NULL - }, - { - "global_vector4_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4*", "globallong4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4*", "globalulong4p", - NULL - }, - { - "global_vector4_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globallong4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalulong4restrictp", - NULL - }, - { - "global_const_vector4_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long4*", "globalconstlong4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong4*", "globalconstulong4p", - NULL - }, - { - "global_const_vector4_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globalconstlong4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalconstulong4restrictp", - NULL - }, - { - "global_volatile_vector4_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "globalvolatilelong4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "globalvolatileulong4p", - NULL - }, - { - "global_volatile_vector4_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globalvolatilelong4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalvolatileulong4restrictp", - NULL - }, - { - "global_const_volatile_vector4_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "globalconstvolatilelong4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "globalconstvolatileulong4p", - NULL - }, - { - "global_const_volatile_vector4_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globalconstvolatilelong4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalconstvolatileulong4restrictp", - NULL - }, - { - "local_vector4_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4*", "locallong4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4*", "localulong4p", - NULL - }, - { - "local_vector4_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "locallong4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localulong4restrictp", - NULL - }, - { - "local_const_vector4_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long4*", "localconstlong4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong4*", "localconstulong4p", - NULL - }, - { - "local_const_vector4_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "localconstlong4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localconstulong4restrictp", - NULL - }, - { - "local_volatile_vector4_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "localvolatilelong4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "localvolatileulong4p", - NULL - }, - { - "local_volatile_vector4_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "localvolatilelong4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localvolatileulong4restrictp", - NULL - }, - { - "local_const_volatile_vector4_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "localconstvolatilelong4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "localconstvolatileulong4p", - NULL - }, - { - "local_const_volatile_vector4_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "localconstvolatilelong4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localconstvolatileulong4restrictp", - NULL - }, - { - "vector4_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "long4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "ulong4d", - NULL - }, - { - "const_vector4_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "constlong4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "constulong4d", - NULL - }, - { - "private_vector4_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "privatelong4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "privateulong4d", - NULL - }, - { - "private_const_vector4_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "privateconstlong4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "privateconstulong4d", - NULL - }, - { - "constant_vector8_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long8*", "constantlong8p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong8*", "constantulong8p", - NULL - }, - { - "constant_vector8_restrict_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "constantlong8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "constantulong8restrictp", - NULL - }, - { - "global_vector8_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8*", "globallong8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8*", "globalulong8p", - NULL - }, - { - "global_vector8_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globallong8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalulong8restrictp", - NULL - }, - { - "global_const_vector8_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long8*", "globalconstlong8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong8*", "globalconstulong8p", - NULL - }, - { - "global_const_vector8_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globalconstlong8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalconstulong8restrictp", - NULL - }, - { - "global_volatile_vector8_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "globalvolatilelong8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "globalvolatileulong8p", - NULL - }, - { - "global_volatile_vector8_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globalvolatilelong8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalvolatileulong8restrictp", - NULL - }, - { - "global_const_volatile_vector8_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "globalconstvolatilelong8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "globalconstvolatileulong8p", - NULL - }, - { - "global_const_volatile_vector8_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globalconstvolatilelong8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalconstvolatileulong8restrictp", - NULL - }, - { - "local_vector8_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8*", "locallong8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8*", "localulong8p", - NULL - }, - { - "local_vector8_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "locallong8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localulong8restrictp", - NULL - }, - { - "local_const_vector8_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long8*", "localconstlong8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong8*", "localconstulong8p", - NULL - }, - { - "local_const_vector8_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "localconstlong8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localconstulong8restrictp", - NULL - }, - { - "local_volatile_vector8_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "localvolatilelong8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "localvolatileulong8p", - NULL - }, - { - "local_volatile_vector8_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "localvolatilelong8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localvolatileulong8restrictp", - NULL - }, - { - "local_const_volatile_vector8_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "localconstvolatilelong8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "localconstvolatileulong8p", - NULL - }, +// Copyright (c) 2021 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include +#include +#include "testBase.h" +#include "harness/errorHelpers.h" +#include "harness/typeWrappers.h" +#include "harness/kernelHelpers.h" + +#define MINIMUM_OPENCL_PIPE_VERSION Version(2, 0) + +static constexpr size_t CL_VERSION_LENGTH = 128; +static constexpr size_t KERNEL_ARGUMENT_LENGTH = 128; +static constexpr char KERNEL_ARGUMENT_NAME[] = "argument"; +static constexpr size_t KERNEL_ARGUMENT_NAME_LENGTH = + sizeof(KERNEL_ARGUMENT_NAME) + 1; +static constexpr int SINGLE_KERNEL_ARG_NUMBER = 0; +static constexpr int MAX_NUMBER_OF_KERNEL_ARGS = 128; + +static const std::vector address_qualifiers = { + CL_KERNEL_ARG_ADDRESS_GLOBAL, CL_KERNEL_ARG_ADDRESS_LOCAL, + CL_KERNEL_ARG_ADDRESS_CONSTANT, CL_KERNEL_ARG_ADDRESS_PRIVATE +}; + +static const std::vector image_arguments = { + "image2d_t", "image3d_t", "image2d_array_t", + "image1d_t", "image1d_buffer_t", "image1d_array_t" +}; + +static const std::vector access_qualifiers = { + CL_KERNEL_ARG_ACCESS_READ_WRITE, CL_KERNEL_ARG_ACCESS_READ_ONLY, + CL_KERNEL_ARG_ACCESS_WRITE_ONLY +}; + +static const std::vector type_qualifiers = { + CL_KERNEL_ARG_TYPE_NONE, + CL_KERNEL_ARG_TYPE_CONST, + CL_KERNEL_ARG_TYPE_VOLATILE, + CL_KERNEL_ARG_TYPE_RESTRICT, + CL_KERNEL_ARG_TYPE_CONST | CL_KERNEL_ARG_TYPE_VOLATILE, + CL_KERNEL_ARG_TYPE_CONST | CL_KERNEL_ARG_TYPE_RESTRICT, + CL_KERNEL_ARG_TYPE_VOLATILE | CL_KERNEL_ARG_TYPE_RESTRICT, + CL_KERNEL_ARG_TYPE_CONST | CL_KERNEL_ARG_TYPE_VOLATILE + | CL_KERNEL_ARG_TYPE_RESTRICT, +}; + +static const std::vector pipe_qualifiers = { + CL_KERNEL_ARG_TYPE_PIPE, + CL_KERNEL_ARG_TYPE_CONST | CL_KERNEL_ARG_TYPE_PIPE, + CL_KERNEL_ARG_TYPE_VOLATILE | CL_KERNEL_ARG_TYPE_PIPE, + CL_KERNEL_ARG_TYPE_CONST | CL_KERNEL_ARG_TYPE_VOLATILE + | CL_KERNEL_ARG_TYPE_PIPE, +}; + +static std::string +get_address_qualifier(cl_kernel_arg_address_qualifier address_qualifier) +{ + std::string ret; + if (address_qualifier == CL_KERNEL_ARG_ADDRESS_GLOBAL) + ret = "global"; + else if (address_qualifier == CL_KERNEL_ARG_ADDRESS_CONSTANT) + ret = "constant"; + else if (address_qualifier == CL_KERNEL_ARG_ADDRESS_LOCAL) + ret = "local"; + else if (address_qualifier == CL_KERNEL_ARG_ADDRESS_PRIVATE) + ret = "private"; + return ret; +} + +static std::string +get_access_qualifier(cl_kernel_arg_access_qualifier qualifier) +{ + std::string ret; + if (qualifier == CL_KERNEL_ARG_ACCESS_READ_ONLY) ret = "read_only"; + if (qualifier == CL_KERNEL_ARG_ACCESS_WRITE_ONLY) ret = "write_only"; + if (qualifier == CL_KERNEL_ARG_ACCESS_READ_WRITE) ret = "read_write"; + return ret; +} + +static std::string +get_type_qualifier_prefix(cl_kernel_arg_type_qualifier type_qualifier) +{ + std::string ret; + if (type_qualifier & CL_KERNEL_ARG_TYPE_CONST) ret += "const "; + if (type_qualifier & CL_KERNEL_ARG_TYPE_VOLATILE) ret += "volatile "; + if (type_qualifier & CL_KERNEL_ARG_TYPE_PIPE) ret += "pipe "; + return ret; +} + +static std::string +get_type_qualifier_postfix(cl_kernel_arg_type_qualifier type_qualifier) +{ + std::string ret; + if (type_qualifier & CL_KERNEL_ARG_TYPE_RESTRICT) ret = "restrict"; + return ret; +} + +class KernelArgInfo { +public: + KernelArgInfo(cl_kernel_arg_address_qualifier input_address_qualifier, + cl_kernel_arg_access_qualifier input_access_qualifier, + cl_kernel_arg_type_qualifier input_type_qualifier, + const std::string& input_arg_type, const int argument_number, + const std::string& input_arg_string = "") + : address_qualifier(input_address_qualifier), + access_qualifier(input_access_qualifier), + type_qualifier(input_type_qualifier), arg_string(input_arg_string) + { + strcpy(arg_type, input_arg_type.c_str()); + std::string input_arg_name = + KERNEL_ARGUMENT_NAME + std::to_string(argument_number); + strcpy(arg_name, input_arg_name.c_str()); + }; + KernelArgInfo() = default; + cl_kernel_arg_address_qualifier address_qualifier; + cl_kernel_arg_access_qualifier access_qualifier; + cl_kernel_arg_type_qualifier type_qualifier; + char arg_type[KERNEL_ARGUMENT_LENGTH]; + char arg_name[KERNEL_ARGUMENT_LENGTH]; + std::string arg_string; +}; + +static std::string generate_argument(const KernelArgInfo& kernel_arg) +{ + std::string ret; + + const bool is_image = strstr(kernel_arg.arg_type, "image") + || strstr(kernel_arg.arg_type, "sampler"); + std::string address_qualifier = ""; + // Image Objects are always allocated from the global address space so the + // qualifier should not be specified + if (!is_image) { - "local_const_volatile_vector8_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "localconstvolatilelong8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localconstvolatileulong8restrictp", - NULL - }, + address_qualifier = get_address_qualifier(kernel_arg.address_qualifier); + } + + std::string access_qualifier = + get_access_qualifier(kernel_arg.access_qualifier); + std::string type_qualifier_prefix = + get_type_qualifier_prefix(kernel_arg.type_qualifier); + std::string type_qualifier_postfix = + get_type_qualifier_postfix(kernel_arg.type_qualifier); + + ret += address_qualifier + " "; + ret += access_qualifier + " "; + ret += type_qualifier_prefix + " "; + ret += kernel_arg.arg_type; + ret += " "; + ret += type_qualifier_postfix + " "; + ret += kernel_arg.arg_name; + return ret; +} + +/* This function generates a kernel source and allows for multiple arguments to + * be passed in and subsequently queried. */ +static std::string generate_kernel(const std::vector& all_args, + const bool supports_3d_image_writes = false, + const bool kernel_uses_half_type = false) +{ + + std::string ret; + if (supports_3d_image_writes) { - "vector8_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "long8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "ulong8d", - NULL - }, + ret += "#pragma OPENCL EXTENSION cl_khr_3d_image_writes: enable\n"; + } + if (kernel_uses_half_type) { - "const_vector8_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "constlong8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "constulong8d", - NULL - }, + ret += "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + } + ret += "kernel void get_kernel_arg_info(\n"; + for (int i = 0; i < all_args.size(); ++i) { - "private_vector8_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "privatelong8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "privateulong8d", - NULL - }, + const KernelArgInfo& arg = all_args[i]; + ret += generate_argument(all_args[i]); + if (i == all_args.size() - 1) + { + ret += "\n"; + } + else + { + ret += ",\n"; + } + } + ret += "){}"; + return ret; +} + +static const char* get_kernel_arg_address_qualifier( + cl_kernel_arg_address_qualifier address_qualifier) +{ + switch (address_qualifier) { - "private_const_vector8_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "privateconstlong8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "privateconstulong8d", - NULL - }, + case CL_KERNEL_ARG_ADDRESS_GLOBAL: { + return "GLOBAL"; + } + case CL_KERNEL_ARG_ADDRESS_LOCAL: { + return "LOCAL"; + } + case CL_KERNEL_ARG_ADDRESS_CONSTANT: { + return "CONSTANT"; + } + default: { + return "PRIVATE"; + } + } +} + +static const char* +get_kernel_arg_access_qualifier(cl_kernel_arg_access_qualifier access_qualifier) +{ + switch (access_qualifier) { - "constant_vector16_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long16*", "constantlong16p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong16*", "constantulong16p", - NULL - }, + case CL_KERNEL_ARG_ACCESS_READ_ONLY: { + return "READ_ONLY"; + } + case CL_KERNEL_ARG_ACCESS_WRITE_ONLY: { + return "WRITE_ONLY"; + } + case CL_KERNEL_ARG_ACCESS_READ_WRITE: { + return "READ_WRITE"; + } + default: { + return "NONE"; + } + } +} + +std::string +get_kernel_arg_type_qualifier(cl_kernel_arg_type_qualifier type_qualifier) +{ + std::string ret; + + if (type_qualifier & CL_KERNEL_ARG_TYPE_CONST) ret += "CONST "; + if (type_qualifier & CL_KERNEL_ARG_TYPE_RESTRICT) ret += "RESTRICT "; + if (type_qualifier & CL_KERNEL_ARG_TYPE_VOLATILE) ret += "VOLATILE "; + if (type_qualifier & CL_KERNEL_ARG_TYPE_PIPE) ret += "PIPE"; + + return ret; +} + +static void output_difference(const KernelArgInfo& expected, + const KernelArgInfo& actual) +{ + if (actual.address_qualifier != expected.address_qualifier) { - "constant_vector16_restrict_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "constantlong16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "constantulong16restrictp", - NULL - }, + log_error("Address Qualifier: Expected: %s\t Actual: %s\n", + get_kernel_arg_address_qualifier(expected.address_qualifier), + get_kernel_arg_address_qualifier(actual.address_qualifier)); + } + if (actual.access_qualifier != expected.access_qualifier) { - "global_vector16_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16*", "globallong16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16*", "globalulong16p", - NULL - }, + log_error("Access Qualifier: Expected: %s\t Actual: %s\n", + get_kernel_arg_access_qualifier(expected.access_qualifier), + get_kernel_arg_access_qualifier(actual.access_qualifier)); + } + if (actual.type_qualifier != expected.type_qualifier) { - "global_vector16_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globallong16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalulong16restrictp", - NULL - }, + log_error( + "Type Qualifier: Expected: %s\t Actual: %s\n", + get_kernel_arg_type_qualifier(expected.type_qualifier).c_str(), + get_kernel_arg_type_qualifier(actual.type_qualifier).c_str()); + } + if (strcmp(actual.arg_type, expected.arg_type) != 0) { - "global_const_vector16_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long16*", "globalconstlong16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong16*", "globalconstulong16p", - NULL - }, + log_error("Arg Type: Expected: %s\t Actual: %s\n", expected.arg_type, + actual.arg_type); + } + if (strcmp(actual.arg_name, expected.arg_name) != 0) { - "global_const_vector16_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globalconstlong16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalconstulong16restrictp", - NULL - }, + log_error("Arg Name: Expected: %s\t Actual: %s\n", expected.arg_name, + actual.arg_name); + } + log_error("Argument in Kernel Source Reported as:\n%s\n", + expected.arg_string.c_str()); +} +static int compare_expected_actual(const KernelArgInfo& expected, + const KernelArgInfo& actual) +{ + ++gTestCount; + int ret = TEST_PASS; + if ((actual.address_qualifier != expected.address_qualifier) + || (actual.access_qualifier != expected.access_qualifier) + || (actual.type_qualifier != expected.type_qualifier) + || (strcmp(actual.arg_type, expected.arg_type) != 0) + || (strcmp(actual.arg_name, expected.arg_name) != 0)) + { + ret = TEST_FAIL; + output_difference(expected, actual); + ++gFailCount; + } + return ret; +} + +static bool device_supports_pipes(cl_device_id deviceID) +{ + auto version = get_device_cl_version(deviceID); + if (version < MINIMUM_OPENCL_PIPE_VERSION) { - "global_volatile_vector16_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "globalvolatilelong16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "globalvolatileulong16p", - NULL - }, + return false; + } + cl_uint max_packet_size = 0; + cl_int err = + clGetDeviceInfo(deviceID, CL_DEVICE_PIPE_MAX_PACKET_SIZE, + sizeof(max_packet_size), &max_packet_size, nullptr); + test_error_ret(err, "clGetDeviceInfo", false); + if ((max_packet_size == 0) && (version >= Version(3, 0))) + { + return false; + } + return true; +} + +static std::string get_build_options(cl_device_id deviceID) +{ + std::string ret = "-cl-kernel-arg-info"; + if (get_device_cl_version(deviceID) >= MINIMUM_OPENCL_PIPE_VERSION) { - "global_volatile_vector16_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globalvolatilelong16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalvolatileulong16restrictp", - NULL - }, + if (device_supports_pipes(deviceID)) + { + if (get_device_cl_version(deviceID) >= Version(3, 0)) + { + ret += " -cl-std=CL3.0"; + } + else + { + ret += " -cl-std=CL2.0"; + } + } + } + return ret; +} + +static std::string get_expected_arg_type(const std::string& type_string, + const bool is_pointer) +{ + bool is_unsigned = false; + std::istringstream type_stream(type_string); + std::string base_type = ""; + std::string ret = ""; + /* Signed and Unsigned on their own represent an int */ + if (type_string == "signed" || type_string == "signed*") + { + base_type = "int"; + } + else if (type_string == "unsigned" || type_string == "unsigned*") { - "global_const_volatile_vector16_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "globalconstvolatilelong16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "globalconstvolatileulong16p", - NULL - }, + base_type = "int"; + is_unsigned = true; + } + else { - "global_const_volatile_vector16_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globalconstvolatilelong16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalconstvolatileulong16restrictp", - NULL - }, + std::string token; + /* Iterate through the argument type to determine what the type is and + * whether or not it is signed */ + while (std::getline(type_stream, token, ' ')) + { + if (token.find("unsigned") != std::string::npos) + { + is_unsigned = true; + } + if (token.find("signed") == std::string::npos) + { + base_type = token; + } + } + } + ret = base_type; + if (is_unsigned) { - "local_vector16_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16*", "locallong16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16*", "localulong16p", - NULL - }, + ret.insert(0, "u"); + } + /* Ensure that the data type is a pointer if it is not already when + * necessary */ + if (is_pointer && ret.back() != '*') { - "local_vector16_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "locallong16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localulong16restrictp", - NULL - }, + ret += "*"; + } + return ret; +} + +static KernelArgInfo +create_expected_arg_info(const KernelArgInfo& kernel_argument, bool is_pointer) +{ + KernelArgInfo ret = kernel_argument; + const std::string arg_string = generate_argument(kernel_argument); + ret.arg_string = arg_string; + + std::string type_string(kernel_argument.arg_type); + /* We only need to modify the expected return values for scalar types */ + if ((is_pointer && !isdigit(type_string.back() - 1)) + || !isdigit(type_string.back())) + { + std::string expected_arg_type = + get_expected_arg_type(type_string, is_pointer); + + /* Reset the Contents of expected arg_type char[] and then assign it to + * the expected value */ + memset(ret.arg_type, 0, sizeof(ret.arg_type)); + strcpy(ret.arg_type, expected_arg_type.c_str()); + } + + /* Any values passed by reference has TYPE_NONE */ + if (!is_pointer) { - "local_const_vector16_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long16*", "localconstlong16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong16*", "localconstulong16p", - NULL - }, + ret.type_qualifier = CL_KERNEL_ARG_TYPE_NONE; + } + + /* If the address qualifier is CONSTANT we expect to see the TYPE_CONST + * qualifier*/ + if (kernel_argument.address_qualifier == CL_KERNEL_ARG_ADDRESS_CONSTANT) { - "local_const_vector16_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "localconstlong16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localconstulong16restrictp", - NULL - }, + ret.type_qualifier |= CL_KERNEL_ARG_TYPE_CONST; + } + + /* The PIPE qualifier is special. It can only be used in a global scope. It + * also ignores any other qualifiers */ + if (kernel_argument.type_qualifier & CL_KERNEL_ARG_TYPE_PIPE) { - "local_volatile_vector16_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "localvolatilelong16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "localvolatileulong16p", - NULL - }, + ret.address_qualifier = CL_KERNEL_ARG_ADDRESS_GLOBAL; + ret.type_qualifier = CL_KERNEL_ARG_TYPE_PIPE; + } + + return ret; +} + +/* There are too many vector arguments for it to be worth writing down + * statically and are instead generated here and combined with all of the scalar + * and unsigned scalar types in a single data structure */ +static std::vector +generate_all_type_arguments(cl_device_id deviceID) +{ + std::vector ret = { + "char", "short", "int", "float", + "void", "uchar", "unsigned char", "ushort", + "unsigned short", "uint", "unsigned int", "char unsigned", + "short unsigned", "int unsigned", "signed short", "signed int", + "signed long", "short signed", "int signed", "signed", + "unsigned" + }; + + std::vector vector_types = { "char", "uchar", "short", + "ushort", "int", "uint", + "float" }; + if (gHasLong) + { + ret.push_back("long"); + ret.push_back("ulong"); + ret.push_back("unsigned long"); + ret.push_back("long unsigned"); + ret.push_back("long signed"); + vector_types.push_back("long"); + vector_types.push_back("ulong"); + } + if (device_supports_half(deviceID)) { - "local_volatile_vector16_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "localvolatilelong16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localvolatileulong16restrictp", - NULL - }, + vector_types.push_back("half"); + } + if (device_supports_double(deviceID)) { - "local_const_volatile_vector16_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "localconstvolatilelong16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "localconstvolatileulong16p", - NULL - }, + vector_types.push_back("double"); + } + static const std::vector vector_values = { "2", "3", "4", "8", + "16" }; + for (auto vector_type : vector_types) { - "local_const_volatile_vector16_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "localconstvolatilelong16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localconstvolatileulong16restrictp", - NULL - }, + for (auto vector_value : vector_values) + { + ret.push_back(vector_type + vector_value); + } + } + return ret; +} + +static int +compare_kernel_with_expected(cl_context context, cl_device_id deviceID, + const char* kernel_src, + const std::vector& expected_args) +{ + int failed_tests = 0; + clKernelWrapper kernel; + clProgramWrapper program; + cl_int err = create_single_kernel_helper_with_build_options( + context, &program, &kernel, 1, &kernel_src, "get_kernel_arg_info", + get_build_options(deviceID).c_str()); + test_error(err, "create_single_kernel_helper_with_build_options"); + for (int i = 0; i < expected_args.size(); ++i) + { + KernelArgInfo actual; + err = clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_ADDRESS_QUALIFIER, + sizeof(actual.address_qualifier), + &(actual.address_qualifier), nullptr); + test_error(err, "clGetKernelArgInfo"); + + err = clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_ACCESS_QUALIFIER, + sizeof(actual.access_qualifier), + &(actual.access_qualifier), nullptr); + test_error(err, "clGetKernelArgInfo"); + + err = clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_TYPE_QUALIFIER, + sizeof(actual.type_qualifier), + &(actual.type_qualifier), nullptr); + test_error(err, "clGetKernelArgInfo"); + + err = clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_TYPE_NAME, + sizeof(actual.arg_type), &(actual.arg_type), + nullptr); + test_error(err, "clGetKernelArgInfo"); + + err = clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_NAME, + sizeof(actual.arg_name), &(actual.arg_name), + nullptr); + test_error(err, "clGetKernelArgInfo"); + + failed_tests += compare_expected_actual(expected_args[i], actual); + } + return failed_tests; +} + +size_t get_param_size(const std::string& arg_type, cl_device_id deviceID, + bool is_pipe) +{ + if (is_pipe) { - "vector16_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "long16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "ulong16d", - NULL - }, + return (sizeof(int*)); + } + if (arg_type.find("*") != std::string::npos) { - "const_vector16_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "constlong16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "constulong16d", - NULL - }, + cl_uint device_address_bits = 0; + cl_int err = clGetDeviceInfo(deviceID, CL_DEVICE_ADDRESS_BITS, + sizeof(device_address_bits), + &device_address_bits, NULL); + return (device_address_bits / 8); + } + + size_t ret(0); + if (arg_type.find("char") != std::string::npos) { - "private_vector16_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "privatelong16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "privateulong16d", - NULL - }, + ret += sizeof(cl_char); + } + if (arg_type.find("short") != std::string::npos) { - "private_const_vector16_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "privateconstlong16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "privateconstulong16d", - NULL - }, -}; - -template -int test(cl_device_id deviceID, cl_context context, kernel_args_t kernel_args, cl_uint lines_count, arg_info_t arg_info, size_t total_kernels_in_program) { - - const size_t max_name_len = 512; - cl_char name[ max_name_len ]; - cl_uint arg_count, numArgs; - size_t i, j, size; - int error; - - clProgramWrapper program = - clCreateProgramWithSource(context, lines_count, kernel_args, NULL, &error); - if ( program == NULL || error != CL_SUCCESS ) + ret += sizeof(cl_short); + } + if (arg_type.find("half") != std::string::npos) { - print_error( error, "Unable to create required arguments kernel program" ); - return -1; + ret += sizeof(cl_half); } - - // Compile the program - log_info( "Building kernels...\n" ); - clBuildProgram( program, 1, &deviceID, "-cl-kernel-arg-info", NULL, NULL ); - - // check for build errors and exit if things didn't work - size_t size_ret; - cl_build_status build_status; - error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof(build_status), &build_status, &size_ret); - test_error( error, "Unable to query build status" ); - if (build_status == CL_BUILD_ERROR) { - printf("CL_PROGRAM_BUILD_STATUS=%d\n", (int) build_status); - error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret); - test_error( error, "Unable to get build log size" ); - char *build_log = (char *)malloc(size_ret); - error = clGetProgramBuildInfo(program,deviceID, CL_PROGRAM_BUILD_LOG, size_ret, build_log, &size_ret); - test_error( error, "Unable to get build log" ); - printf("CL_PROGRAM_BUILD_LOG:\n%s\n", build_log); - printf("CL_BUILD_ERROR. exiting\n"); - free(build_log); - return -1; + if (arg_type.find("int") != std::string::npos) + { + ret += sizeof(cl_int); } - - // Lookup the number of kernels in the program. - log_info( "Testing kernels...\n" ); - size_t total_kernels = 0; - error = clGetProgramInfo( program, CL_PROGRAM_NUM_KERNELS, sizeof( size_t ), &total_kernels, NULL ); - test_error( error, "Unable to get program info num kernels" ); - - if ( total_kernels != total_kernels_in_program ) + if (arg_type.find("long") != std::string::npos) { - print_error( error, "Program did not build all kernels" ); - return -1; + ret += sizeof(cl_long); } - - // Lookup the kernel names. - size_t kernel_names_len = 0; - error = clGetProgramInfo( program, CL_PROGRAM_KERNEL_NAMES, 0, NULL, &kernel_names_len ); - test_error( error, "Unable to get length of kernel names list." ); - - size_t expected_kernel_names_len = 0; - for ( i = 0; i < total_kernels; ++i ) + if (arg_type.find("float") != std::string::npos) { - expected_kernel_names_len += 1 + strlen( arg_info[ i ][ 0 ] ); + ret += sizeof(cl_float); } - if ( kernel_names_len != expected_kernel_names_len ) + if (arg_type.find("double") != std::string::npos) { - log_error( "Kernel names string is not the right length, expected %d, got %d\n", (int) expected_kernel_names_len, (int) kernel_names_len ); - return -1; + ret += sizeof(cl_double); } - - const size_t len = ( kernel_names_len + 1 ) * sizeof( char ); - char* kernel_names = (char*) malloc( len ); - error = clGetProgramInfo( program, CL_PROGRAM_KERNEL_NAMES, len, kernel_names, &kernel_names_len ); - test_error( error, "Unable to get kernel names list." ); - - // Check to see if the kernel name array is null terminated. - if ( kernel_names[ kernel_names_len - 1 ] != '\0' ) + if (arg_type.back() == '2') { - free( kernel_names ); - print_error( error, "Kernel name list was not null terminated" ); - return -1; + ret *= 2; } - - // Check to see if the correct kernel name string was returned. - // Does the string contain each expected kernel name? - for ( i = 0; i < total_kernels; ++i ) - if ( !strstr( kernel_names, arg_info[ i ][ 0 ] ) ) - break; - if ( i != total_kernels ) + if (arg_type.back() == '3') { - log_error( "Kernel names string is missing \"%s\"\n", arg_info[ i ][ 0 ] ); - free( kernel_names ); - return -1; + ret *= 4; } - - // Are the kernel names delimited by ';'? - if ( !strtok( kernel_names, ";" ) ) + if (arg_type.back() == '4') { - error = -1; + ret *= 4; } - else + if (arg_type.back() == '8') { - for ( i = 1; i < total_kernels; ++i ) - { - if ( !strtok( NULL, ";" ) ) - { - error = -1; - } - } + ret *= 8; } - if ( error ) + // If the last character is a 6 it represents a vector of 16 + if (arg_type.back() == '6') { - log_error( "Kernel names string was not properly delimited by ';'\n" ); - free( kernel_names ); - return -1; + ret *= 16; } - free( kernel_names ); + return ret; +} - // Create kernel objects and query them. - int rc = 0; - for ( i = 0; i < total_kernels; ++i ) - { - int kernel_rc = 0; - const char* kernel_name = arg_info[ i ][ 0 ]; - clKernelWrapper kernel = clCreateKernel(program, kernel_name, &error); - if( kernel == NULL || error != CL_SUCCESS ) - { - log_error( "ERROR: Could not get kernel: %s\n", kernel_name ); - kernel_rc = -1; - } +static int run_scalar_vector_tests(cl_context context, cl_device_id deviceID) +{ + int failed_tests = 0; - if(kernel_rc == 0) - { - // Determine the expected number of arguments. - arg_count = 0; - while (arg_info[ i ][ (ARG_INFO_FIELD_COUNT * arg_count) + 1 ] != NULL) - ++arg_count; + std::vector type_arguments = + generate_all_type_arguments(deviceID); - // Try to get the number of arguments. - error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, &size ); - test_error( error, "Unable to get kernel arg count param size" ); - if( size != sizeof( numArgs ) ) - { - log_error( "ERROR: Kernel arg count param returns invalid size (expected %d, got %d) for kernel: %s\n", (int)sizeof( numArgs ), (int)size, kernel_name ); - kernel_rc = -1; - } - } + const std::vector access_qualifiers = { + CL_KERNEL_ARG_ACCESS_NONE, CL_KERNEL_ARG_ACCESS_READ_ONLY, + CL_KERNEL_ARG_ACCESS_WRITE_ONLY + }; + std::vector all_args, expected_args; + size_t max_param_size = get_max_param_size(deviceID); + size_t total_param_size(0); + for (auto address_qualifier : address_qualifiers) + { + bool is_private = (address_qualifier == CL_KERNEL_ARG_ADDRESS_PRIVATE); - if(kernel_rc == 0) - { - error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( numArgs ), &numArgs, NULL ); - test_error( error, "Unable to get kernel arg count" ); - if( numArgs != arg_count ) - { - log_error( "ERROR: Kernel arg count returned invalid value (expected %d, got %d) for kernel: %s\n", arg_count, numArgs, kernel_name ); - kernel_rc = -1; - } - } + /* OpenCL kernels cannot take "private" pointers and only "private" + * variables can take values */ + bool is_pointer = !is_private; - if(kernel_rc == 0) + for (auto type_qualifier : type_qualifiers) { - for ( j = 0; j < numArgs; ++j ) + bool is_pipe = (type_qualifier & CL_KERNEL_ARG_TYPE_PIPE); + bool is_restrict = (type_qualifier & CL_KERNEL_ARG_TYPE_RESTRICT); + + for (auto access_qualifier : access_qualifiers) { + bool has_access_qualifier = + (access_qualifier != CL_KERNEL_ARG_ACCESS_NONE); - int arg_rc = 0; - cl_kernel_arg_address_qualifier expected_address_qualifier = (cl_kernel_arg_address_qualifier)(uintptr_t)arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_ADDR_OFFSET ]; - cl_kernel_arg_access_qualifier expected_access_qualifier = (cl_kernel_arg_access_qualifier)(uintptr_t)arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_ACCESS_OFFSET ]; - cl_kernel_arg_type_qualifier expected_type_qualifier = (cl_kernel_arg_type_qualifier)(uintptr_t)arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_TYPE_QUAL_OFFSET ]; - const char* expected_type_name = arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_TYPE_NAME_OFFSET ]; - const char* expected_arg_name = arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_ARG_NAME_OFFSET ]; + /*Only images and pipes can have an access qualifier, + * otherwise it should be ACCESS_NONE */ + if (!is_pipe && has_access_qualifier) + { + continue; + } - // Try to get the address qualifier of each argument. - cl_kernel_arg_address_qualifier address_qualifier = 0; - error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_ADDRESS_QUALIFIER, sizeof address_qualifier, &address_qualifier, &size ); - test_error( error, "Unable to get argument address qualifier" ); - error = (address_qualifier != expected_address_qualifier); - if ( error ) + /* If the type is a pipe, then either the specified or + * default access qualifier is returned and so "NONE" will + * never be returned */ + if (is_pipe && !has_access_qualifier) { - log_error( "ERROR: Bad address qualifier, kernel: \"%s\", argument number: %d, expected \"0x%X\", got \"0x%X\"\n", kernel_name, (unsigned int)j, (unsigned int)expected_address_qualifier, (unsigned int)address_qualifier ); - arg_rc = -1; + continue; } - // Try to get the access qualifier of each argument. - cl_kernel_arg_access_qualifier access_qualifier = 0; - error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_ACCESS_QUALIFIER, sizeof access_qualifier, &access_qualifier, &size ); - test_error( error, "Unable to get argument access qualifier" ); - error = (access_qualifier != expected_access_qualifier); - if ( error ) + /* The "restrict" type qualifier can only apply to + * pointers + */ + if (is_restrict && !is_pointer) { - log_error( "ERROR: Bad access qualifier, kernel: \"%s\", argument number: %d, expected \"0x%X\", got \"0x%X\"\n", kernel_name, (unsigned int)j, (unsigned int)expected_access_qualifier, (unsigned int)access_qualifier ); - arg_rc = -1; + continue; } - // Try to get the type qualifier of each argument. - cl_kernel_arg_type_qualifier arg_type_qualifier = 0; - error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_TYPE_QUALIFIER, sizeof arg_type_qualifier, &arg_type_qualifier, &size ); - test_error( error, "Unable to get argument type qualifier" ); - error = (arg_type_qualifier != expected_type_qualifier); - if ( error ) + /* We cannot have pipe pointers */ + if (is_pipe && is_pointer) { - log_error( "ERROR: Bad type qualifier, kernel: \"%s\", argument number: %d, expected \"0x%X\", got \"0x%X\"\n", kernel_name, (unsigned int)j, (unsigned int)expected_type_qualifier, (unsigned int)arg_type_qualifier ); - arg_rc = -1; + continue; } - // Try to get the type of each argument. - memset( name, 0, max_name_len ); - error = clGetKernelArgInfo(kernel, (cl_uint)j, CL_KERNEL_ARG_TYPE_NAME, max_name_len, name, &size ); - test_error( error, "Unable to get argument type name" ); - error = strcmp( (const char*) name, expected_type_name ); - if ( error ) + + for (auto arg_type : type_arguments) { - log_error( "ERROR: Bad argument type name, kernel: \"%s\", argument number: %d, expected \"%s\", got \"%s\"\n", kernel_name, (unsigned int)j, expected_type_name, name ); - arg_rc = -1; + /* Void Types cannot be private */ + if (is_private && arg_type == "void") + { + continue; + } + + if (is_pointer) + { + arg_type += "*"; + } + size_t param_size = + get_param_size(arg_type, deviceID, is_pipe); + if (param_size + total_param_size >= max_param_size + || all_args.size() == MAX_NUMBER_OF_KERNEL_ARGS) + { + const std::string kernel_src = generate_kernel( + all_args, false, device_supports_half(deviceID)); + failed_tests += compare_kernel_with_expected( + context, deviceID, kernel_src.c_str(), + expected_args); + all_args.clear(); + expected_args.clear(); + total_param_size = 0; + } + total_param_size += param_size; + + KernelArgInfo kernel_argument( + address_qualifier, access_qualifier, type_qualifier, + arg_type, all_args.size()); + + expected_args.push_back( + create_expected_arg_info(kernel_argument, is_pointer)); + + all_args.push_back(kernel_argument); } + } + } + } + const std::string kernel_src = + generate_kernel(all_args, false, device_supports_half(deviceID)); + failed_tests += compare_kernel_with_expected( + context, deviceID, kernel_src.c_str(), expected_args); + return failed_tests; +} + +static cl_uint get_max_number_of_pipes(cl_device_id deviceID, cl_int& err) +{ + cl_uint ret(0); + err = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PIPE_ARGS, sizeof(ret), &ret, + nullptr); + return ret; +} - // Try to get the name of each argument. - memset( name, 0, max_name_len ); - error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_NAME, max_name_len, name, &size ); - test_error( error, "Unable to get argument name" ); - error = strcmp( (const char*) name, expected_arg_name ); - if ( error ) +static int run_pipe_tests(cl_context context, cl_device_id deviceID) +{ + int failed_tests = 0; + + cl_kernel_arg_address_qualifier address_qualifier = + CL_KERNEL_ARG_ADDRESS_PRIVATE; + std::vector type_arguments = + generate_all_type_arguments(deviceID); + const std::vector access_qualifiers = { + CL_KERNEL_ARG_ACCESS_READ_ONLY, CL_KERNEL_ARG_ACCESS_WRITE_ONLY + }; + std::vector all_args, expected_args; + size_t max_param_size = get_max_param_size(deviceID); + size_t total_param_size(0); + cl_int err = CL_SUCCESS; + cl_uint max_number_of_pipes = get_max_number_of_pipes(deviceID, err); + test_error_ret(err, "get_max_number_of_pipes", TEST_FAIL); + cl_uint number_of_pipes(0); + + const bool is_pointer = false; + const bool is_pipe = true; + + for (auto type_qualifier : pipe_qualifiers) + { + for (auto access_qualifier : access_qualifiers) + { + for (auto arg_type : type_arguments) + { + /* We cannot have void pipes */ + if (arg_type == "void") { - log_error( "ERROR: Bad argument name, kernel: \"%s\", argument number: %d, expected \"%s\", got \"%s\"\n", kernel_name, (unsigned int)j, expected_arg_name, name ); - arg_rc = -1; + continue; } - if(arg_rc != 0) { - kernel_rc = -1; + size_t param_size = get_param_size(arg_type, deviceID, is_pipe); + if (param_size + total_param_size >= max_param_size + || number_of_pipes == max_number_of_pipes) + { + const std::string kernel_src = generate_kernel(all_args); + failed_tests += compare_kernel_with_expected( + context, deviceID, kernel_src.c_str(), expected_args); + all_args.clear(); + expected_args.clear(); + total_param_size = 0; + number_of_pipes = 0; } - } - } + total_param_size += param_size; + number_of_pipes++; - //log_info( "%s ... %s\n",arg_info[i][0],kernel_rc == 0 ? "passed" : "failed" ); - if(kernel_rc != 0) { - rc = -1; + KernelArgInfo kernel_argument(address_qualifier, + access_qualifier, type_qualifier, + arg_type, all_args.size()); + + expected_args.push_back( + create_expected_arg_info(kernel_argument, is_pointer)); + + all_args.push_back(kernel_argument); + } } } - return rc; + const std::string kernel_src = generate_kernel(all_args); + failed_tests += compare_kernel_with_expected( + context, deviceID, kernel_src.c_str(), expected_args); + return failed_tests; } - -int test_get_kernel_arg_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +static int run_sampler_test(cl_context context, cl_device_id deviceID) { - size_t size; - int error; + cl_kernel_arg_address_qualifier address_qualifier = + CL_KERNEL_ARG_ADDRESS_PRIVATE; + cl_kernel_arg_type_qualifier type_qualifier = CL_KERNEL_ARG_TYPE_NONE; + cl_kernel_arg_access_qualifier access_qualifier = CL_KERNEL_ARG_ACCESS_NONE; + std::string image_type = "sampler_t"; + bool is_pointer = false; + + KernelArgInfo kernel_argument(address_qualifier, access_qualifier, + type_qualifier, image_type, + SINGLE_KERNEL_ARG_NUMBER); - cl_bool supports_double = 0; // assume not - cl_bool supports_half = 0; // assume not - cl_bool supports_images = 0; // assume not - cl_bool supports_long = 0; // assume not - cl_bool supports_3D_images = 0; // assume not + KernelArgInfo expected = + create_expected_arg_info(kernel_argument, is_pointer); - // Check if this device supports images - error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_SUPPORT, sizeof supports_images, &supports_images, NULL); - test_error(error, "clGetDeviceInfo for CL_DEVICE_IMAGE_SUPPORT failed"); + const std::string kernel_src = generate_kernel({ kernel_argument }); - if (supports_images) { - log_info(" o Device supports images\n"); - log_info(" o Expecting SUCCESS when testing image kernel arguments.\n"); - } - else { - log_info(" o Device lacks image support\n"); - log_info(" o Not testing image kernel arguments.\n"); - } + return compare_kernel_with_expected(context, deviceID, kernel_src.c_str(), + { expected }); +} - if (is_extension_available(deviceID, "cl_khr_fp64")) { - log_info(" o Device claims extension 'cl_khr_fp64'\n"); - log_info(" o Expecting SUCCESS when testing double kernel arguments.\n"); - supports_double = 1; - } else { - cl_device_fp_config double_fp_config; - error = clGetDeviceInfo(deviceID, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(double_fp_config), &double_fp_config, NULL); - test_error(error, "clGetDeviceInfo for CL_DEVICE_DOUBLE_FP_CONFIG failed"); - if (double_fp_config != 0) - supports_double = 1; - else { - log_info(" o Device lacks extension 'cl_khr_fp64'\n"); - log_info(" o Not testing double kernel arguments.\n"); - supports_double = 0; +static int run_image_tests(cl_context context, cl_device_id deviceID) +{ + int failed_tests = 0; + bool supports_3d_image_writes = + is_extension_available(deviceID, "cl_khr_3d_image_writes"); + bool is_pointer = false; + cl_kernel_arg_type_qualifier type_qualifier = CL_KERNEL_ARG_TYPE_NONE; + cl_kernel_arg_address_qualifier address_qualifier = + CL_KERNEL_ARG_ADDRESS_GLOBAL; + + for (auto access_qualifier : access_qualifiers) + { + bool is_write = + (access_qualifier == CL_KERNEL_ARG_ACCESS_WRITE_ONLY + || access_qualifier == CL_KERNEL_ARG_ACCESS_READ_WRITE); + for (auto image_type : image_arguments) + { + bool is_3d_image = image_type == "image3d_t"; + /* We can only test 3d image writes if our device supports it */ + if (is_3d_image && is_write) + { + if (!supports_3d_image_writes) + { + continue; + } + } + KernelArgInfo kernel_argument(address_qualifier, access_qualifier, + type_qualifier, image_type, + SINGLE_KERNEL_ARG_NUMBER); + KernelArgInfo expected = + create_expected_arg_info(kernel_argument, is_pointer); + const std::string kernel_src = + generate_kernel({ kernel_argument }, supports_3d_image_writes); + + failed_tests += compare_kernel_with_expected( + context, deviceID, kernel_src.c_str(), { expected }); } } + failed_tests += run_sampler_test(context, deviceID); + return failed_tests; +} - if (is_extension_available(deviceID, "cl_khr_fp16")) { - log_info(" o Device claims extension 'cl_khr_fp16'\n"); - log_info(" o Expecting SUCCESS when testing halfn* kernel arguments.\n"); - supports_half = 1; - } else { - log_info(" o Device lacks extension 'cl_khr_fp16'\n"); - log_info(" o Not testing halfn* kernel arguments.\n"); - supports_half = 0; - } +/* Ensure clGetKernelArgInfo returns successfully when param_value is + * set to null */ +static int test_null_param(cl_context context, cl_device_id deviceID, + char const* kernel_src) +{ + clProgramWrapper program; + clKernelWrapper kernel; + cl_int err = create_single_kernel_helper_with_build_options( + context, &program, &kernel, 1, &kernel_src, "get_kernel_arg_info", + get_build_options(deviceID).c_str()); + test_error_ret(err, "create_single_kernel_helper_with_build_options", + TEST_FAIL); + + err = clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER, + CL_KERNEL_ARG_ADDRESS_QUALIFIER, 0, nullptr, + nullptr); + test_error_ret(err, "clGetKernelArgInfo", TEST_FAIL); + + err = + clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER, + CL_KERNEL_ARG_ACCESS_QUALIFIER, 0, nullptr, nullptr); + test_error_ret(err, "clGetKernelArgInfo", TEST_FAIL); + + err = clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER, + CL_KERNEL_ARG_TYPE_QUALIFIER, 0, nullptr, nullptr); + test_error_ret(err, "clGetKernelArgInfo", TEST_FAIL); + + err = clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER, + CL_KERNEL_ARG_TYPE_NAME, 0, nullptr, nullptr); + test_error_ret(err, "clGetKernelArgInfo", TEST_FAIL); + + err = clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER, + CL_KERNEL_ARG_NAME, 0, nullptr, nullptr); + test_error_ret(err, "clGetKernelArgInfo", TEST_FAIL); + + return TEST_PASS; +} - if (is_extension_available(deviceID, "cl_khr_int64")) - { - log_info(" o Device claims extension 'cl_khr_int64'\n"); - log_info(" o Expecting SUCCESS when testing long kernel arguments.\n"); - supports_long = 1; - } else +/* Ensure clGetKernelArgInfo returns the correct size in bytes for the + * kernel arg name */ +static int test_arg_name_size(cl_context context, cl_device_id deviceID, + char const* kernel_src) +{ + size_t size; + /* We are adding +1 because the argument used in this kernel is argument0 + * which has 1 extra character than just the base argument name */ + char arg_return[sizeof(KERNEL_ARGUMENT_NAME) + 1]; + clProgramWrapper program; + clKernelWrapper kernel; + cl_int err = create_single_kernel_helper_with_build_options( + context, &program, &kernel, 1, &kernel_src, "get_kernel_arg_info", + get_build_options(deviceID).c_str()); + + test_error_ret(err, "create_single_kernel_helper_with_build_options", + TEST_FAIL); + + err = + clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER, CL_KERNEL_ARG_NAME, + sizeof(arg_return), &arg_return, &size); + test_error_ret(err, "clGetKernelArgInfo", TEST_FAIL); + if (size == sizeof(KERNEL_ARGUMENT_NAME) + 1) + { + return TEST_PASS; + } + else { - log_info(" o Device lacks extension 'cl_khr_int64'\n"); - log_info(" o Not testing long kernel arguments.\n"); - supports_long = 0; + return TEST_FAIL; } +} - error = checkFor3DImageSupport(deviceID); - if (error != CL_IMAGE_FORMAT_NOT_SUPPORTED) - { - log_info(" o Device supports 3D images\n"); - log_info(" o Expecting SUCCESS when testing 3D image kernel arguments.\n"); - supports_3D_images = 1; - } else +static int run_boundary_tests(cl_context context, cl_device_id deviceID) +{ + int failed_tests = 0; + + cl_kernel_arg_address_qualifier address_qualifier = + CL_KERNEL_ARG_ADDRESS_GLOBAL; + cl_kernel_arg_access_qualifier access_qualifier = CL_KERNEL_ARG_ACCESS_NONE; + cl_kernel_arg_type_qualifier type_qualifier = CL_KERNEL_ARG_TYPE_NONE; + std::string arg_type = "int*"; + KernelArgInfo arg_info(address_qualifier, access_qualifier, type_qualifier, + arg_type, SINGLE_KERNEL_ARG_NUMBER); + const std::string kernel_src = generate_kernel({ arg_info }); + + failed_tests += test_arg_name_size(context, deviceID, kernel_src.c_str()); + + if (test_null_param(context, deviceID, kernel_src.c_str()) != TEST_PASS) { - log_info(" o Device lacks 3D image support\n"); - log_info(" o Not testing 3D image kernel arguments.\n"); - supports_3D_images = 0; + failed_tests++; } - int test_failed = 0; + return failed_tests; +} - // Now create a test program using required arguments - log_info("Testing required kernel arguments...\n"); - error = test(deviceID, context, required_kernel_args, sizeof(required_kernel_args)/sizeof(required_kernel_args[0]), required_arg_info, sizeof(required_arg_info)/sizeof(required_arg_info[0])); - test_failed = (error) ? -1 : test_failed; +static int run_all_tests(cl_context context, cl_device_id deviceID) +{ - if ( supports_images ) + int failed_scalar_tests = run_scalar_vector_tests(context, deviceID); + if (failed_scalar_tests == 0) { - log_info("Testing optional image arguments...\n"); - error = test(deviceID, context, image_kernel_args, sizeof(image_kernel_args)/sizeof(image_kernel_args[0]), image_arg_info, sizeof(image_arg_info)/sizeof(image_arg_info[0])); - test_failed = (error) ? -1 : test_failed; + log_info("All Data Type Tests Passed\n"); } - - if ( supports_double ) + else { - log_info("Testing optional double arguments...\n"); - error = test(deviceID, context, double_kernel_args, sizeof(double_kernel_args)/sizeof(double_kernel_args[0]), double_arg_info, sizeof(double_arg_info)/sizeof(double_arg_info[0])); - test_failed = (error) ? -1 : test_failed; + log_error("%d Data Type Test(s) Failed\n", failed_scalar_tests); } - if ( supports_half ) + int failed_image_tests = 0; + if (checkForImageSupport(deviceID) == 0) + { + failed_image_tests = run_image_tests(context, deviceID); + if (failed_image_tests == 0) + { + log_info("All Image Tests Passed\n"); + } + else + { + log_error("%d Image Test(s) Failed\n", failed_image_tests); + } + } + int failed_pipe_tests = 0; + // TODO https://github.com/KhronosGroup/OpenCL-CTS/issues/1244 + if (false) { - log_info("Testing optional half arguments...\n"); - error = test(deviceID, context, half_kernel_args, sizeof(half_kernel_args)/sizeof(half_kernel_args[0]), half_arg_info, sizeof(half_arg_info)/sizeof(half_arg_info[0])); - test_failed = (error) ? -1 : test_failed; + failed_pipe_tests = run_pipe_tests(context, deviceID); + if (failed_pipe_tests == 0) + { + log_info("All Pipe Tests Passed\n"); + } + else + { + log_error("%d Pipe Test(s) Failed\n", failed_pipe_tests); + } } - if ( supports_long ) + int failed_boundary_tests = run_boundary_tests(context, deviceID); + if (failed_boundary_tests == 0) { - log_info("Testing optional long arguments...\n"); - error = test(deviceID, context, long_kernel_args, sizeof(long_kernel_args)/sizeof(long_kernel_args[0]), long_arg_info, sizeof(long_arg_info)/sizeof(long_arg_info[0])); - test_failed = (error) ? -1 : test_failed; + log_info("All Edge Case Tests Passed\n"); } - - if ( supports_3D_images ) + else { - log_info("Testing optional 3D image arguments...\n"); - error = test(deviceID, context, image_3D_kernel_args, sizeof(image_3D_kernel_args)/sizeof(image_3D_kernel_args[0]), image_3D_arg_info, sizeof(image_3D_arg_info)/sizeof(image_3D_arg_info[0])); - test_failed = (error) ? -1 : test_failed; + log_error("%d Edge Case Test(s) Failed\n", failed_boundary_tests); } - return test_failed; + return (failed_scalar_tests + failed_image_tests + failed_pipe_tests + + failed_boundary_tests); +} + +int test_get_kernel_arg_info(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + int failed_tests = run_all_tests(context, deviceID); + if (failed_tests != 0) + { + log_error("%d Test(s) Failed\n", failed_tests); + return TEST_FAIL; + } + else + { + return TEST_PASS; + } } diff --git a/test_conformance/api/test_kernel_arg_info_compatibility.cpp b/test_conformance/api/test_kernel_arg_info_compatibility.cpp deleted file mode 100644 index a6b60c265e..0000000000 --- a/test_conformance/api/test_kernel_arg_info_compatibility.cpp +++ /dev/null @@ -1,5159 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "testBase.h" -#include -#include -#ifndef _WIN32 -#include -#endif - -#define ARG_INFO_FIELD_COUNT 5 - -#define ARG_INFO_ADDR_OFFSET 1 -#define ARG_INFO_ACCESS_OFFSET 2 -#define ARG_INFO_TYPE_QUAL_OFFSET 3 -#define ARG_INFO_TYPE_NAME_OFFSET 4 -#define ARG_INFO_ARG_NAME_OFFSET 5 - -typedef char const * kernel_args_t[]; - -static kernel_args_t required_kernel_args = { - "typedef float4 typedef_type;\n" - "\n" - "typedef struct struct_type {\n" - " float4 float4d;\n" - " int intd;\n" - "} typedef_struct_type;\n" - "\n" - "typedef union union_type {\n" - " float4 float4d;\n" - " uint4 uint4d;\n" - "} typedef_union_type;\n" - "\n" - "typedef enum enum_type {\n" - " enum_type_zero,\n" - " enum_type_one,\n" - " enum_type_two\n" - "} typedef_enum_type;\n" - "\n" - "kernel void constant_scalar_p0(constant void*constantvoidp,\n" - " constant char *constantcharp,\n" - " constant uchar* constantucharp,\n" - " constant unsigned char * constantunsignedcharp)\n" - "{}\n", - "kernel void constant_scalar_p1(constant short*constantshortp,\n" - " constant ushort *constantushortp,\n" - " constant unsigned short* constantunsignedshortp,\n" - " constant int * constantintp)\n" - "{}\n", - "kernel void constant_scalar_p2(constant uint*constantuintp,\n" - " constant unsigned int *constantunsignedintp,\n" - " constant long* constantlongp,\n" - " constant ulong * constantulongp)\n" - "{}\n", - "kernel void constant_scalar_p3(constant unsigned long*constantunsignedlongp,\n" - " constant float *constantfloatp)\n" - "{}\n", - "\n" - "kernel void constant_scalar_restrict_p0(constant void* restrict constantvoidrestrictp,\n" - " constant char * restrict constantcharrestrictp,\n" - " constant uchar*restrict constantucharrestrictp,\n" - " constant unsigned char *restrict constantunsignedcharrestrictp)\n" - "{}\n", - "kernel void constant_scalar_restrict_p1(constant short* restrict constantshortrestrictp,\n" - " constant ushort * restrict constantushortrestrictp,\n" - " constant unsigned short*restrict constantunsignedshortrestrictp,\n" - " constant int *restrict constantintrestrictp)\n" - "{}\n", - "kernel void constant_scalar_restrict_p2(constant uint* restrict constantuintrestrictp,\n" - " constant unsigned int * restrict constantunsignedintrestrictp,\n" - " constant long*restrict constantlongrestrictp,\n" - " constant ulong *restrict constantulongrestrictp)\n" - "{}\n", - "kernel void constant_scalar_restrict_p3(constant unsigned long* restrict constantunsignedlongrestrictp,\n" - " constant float * restrict constantfloatrestrictp)\n" - "{}\n", - "\n" - "kernel void global_scalar_p(global void*globalvoidp,\n" - " global char *globalcharp,\n" - " global uchar* globalucharp,\n" - " global unsigned char * globalunsignedcharp,\n" - " global short*globalshortp,\n" - " global ushort *globalushortp,\n" - " global unsigned short* globalunsignedshortp,\n" - " global int * globalintp,\n" - " global uint*globaluintp,\n" - " global unsigned int *globalunsignedintp,\n" - " global long* globallongp,\n" - " global ulong * globalulongp,\n" - " global unsigned long*globalunsignedlongp,\n" - " global float *globalfloatp)\n" - "{}\n", - "\n" - "kernel void global_scalar_restrict_p(global void* restrict globalvoidrestrictp,\n" - " global char * restrict globalcharrestrictp,\n" - " global uchar*restrict globalucharrestrictp,\n" - " global unsigned char *restrict globalunsignedcharrestrictp,\n" - " global short* restrict globalshortrestrictp,\n" - " global ushort * restrict globalushortrestrictp,\n" - " global unsigned short*restrict globalunsignedshortrestrictp,\n" - " global int *restrict globalintrestrictp,\n" - " global uint* restrict globaluintrestrictp,\n" - " global unsigned int * restrict globalunsignedintrestrictp,\n" - " global long*restrict globallongrestrictp,\n" - " global ulong *restrict globalulongrestrictp,\n" - " global unsigned long* restrict globalunsignedlongrestrictp,\n" - " global float * restrict globalfloatrestrictp)\n" - "{}\n", - "\n" - "kernel void global_const_scalar_p(global const void*globalconstvoidp,\n" - " global const char *globalconstcharp,\n" - " global const uchar* globalconstucharp,\n" - " global const unsigned char * globalconstunsignedcharp,\n" - " global const short*globalconstshortp,\n" - " global const ushort *globalconstushortp,\n" - " global const unsigned short* globalconstunsignedshortp,\n" - " global const int * globalconstintp,\n" - " global const uint*globalconstuintp,\n" - " global const unsigned int *globalconstunsignedintp,\n" - " global const long* globalconstlongp,\n" - " global const ulong * globalconstulongp,\n" - " global const unsigned long*globalconstunsignedlongp,\n" - " global const float *globalconstfloatp)\n" - "{}\n", - "\n" - "kernel void global_const_scalar_restrict_p(global const void* restrict globalconstvoidrestrictp,\n" - " global const char * restrict globalconstcharrestrictp,\n" - " global const uchar*restrict globalconstucharrestrictp,\n" - " global const unsigned char *restrict globalconstunsignedcharrestrictp,\n" - " global const short* restrict globalconstshortrestrictp,\n" - " global const ushort * restrict globalconstushortrestrictp,\n" - " global const unsigned short*restrict globalconstunsignedshortrestrictp,\n" - " global const int *restrict globalconstintrestrictp,\n" - " global const uint* restrict globalconstuintrestrictp,\n" - " global const unsigned int * restrict globalconstunsignedintrestrictp,\n" - " global const long*restrict globalconstlongrestrictp,\n" - " global const ulong *restrict globalconstulongrestrictp,\n" - " global const unsigned long* restrict globalconstunsignedlongrestrictp,\n" - " global const float * restrict globalconstfloatrestrictp)\n" - "{}\n", - "\n" - "kernel void global_volatile_scalar_p(global volatile void*globalvolatilevoidp,\n" - " global volatile char *globalvolatilecharp,\n" - " global volatile uchar* globalvolatileucharp,\n" - " global volatile unsigned char * globalvolatileunsignedcharp,\n" - " global volatile short*globalvolatileshortp,\n" - " global volatile ushort *globalvolatileushortp,\n" - " global volatile unsigned short* globalvolatileunsignedshortp,\n" - " global volatile int * globalvolatileintp,\n" - " global volatile uint*globalvolatileuintp,\n" - " global volatile unsigned int *globalvolatileunsignedintp,\n" - " global volatile long* globalvolatilelongp,\n" - " global volatile ulong * globalvolatileulongp,\n" - " global volatile unsigned long*globalvolatileunsignedlongp,\n" - " global volatile float *globalvolatilefloatp)\n" - "{}\n", - "\n" - "kernel void global_volatile_scalar_restrict_p(global volatile void* restrict globalvolatilevoidrestrictp,\n" - " global volatile char * restrict globalvolatilecharrestrictp,\n" - " global volatile uchar*restrict globalvolatileucharrestrictp,\n" - " global volatile unsigned char *restrict globalvolatileunsignedcharrestrictp,\n" - " global volatile short* restrict globalvolatileshortrestrictp,\n" - " global volatile ushort * restrict globalvolatileushortrestrictp,\n" - " global volatile unsigned short*restrict globalvolatileunsignedshortrestrictp,\n" - " global volatile int *restrict globalvolatileintrestrictp,\n" - " global volatile uint* restrict globalvolatileuintrestrictp,\n" - " global volatile unsigned int * restrict globalvolatileunsignedintrestrictp,\n" - " global volatile long*restrict globalvolatilelongrestrictp,\n" - " global volatile ulong *restrict globalvolatileulongrestrictp,\n" - " global volatile unsigned long* restrict globalvolatileunsignedlongrestrictp,\n" - " global volatile float * restrict globalvolatilefloatrestrictp)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_scalar_p(global const volatile void*globalconstvolatilevoidp,\n" - " global const volatile char *globalconstvolatilecharp,\n" - " global const volatile uchar* globalconstvolatileucharp,\n" - " global const volatile unsigned char * globalconstvolatileunsignedcharp,\n" - " global const volatile short*globalconstvolatileshortp,\n" - " global const volatile ushort *globalconstvolatileushortp,\n" - " global const volatile unsigned short* globalconstvolatileunsignedshortp,\n" - " global const volatile int * globalconstvolatileintp,\n" - " global const volatile uint*globalconstvolatileuintp,\n" - " global const volatile unsigned int *globalconstvolatileunsignedintp,\n" - " global const volatile long* globalconstvolatilelongp,\n" - " global const volatile ulong * globalconstvolatileulongp,\n" - " global const volatile unsigned long*globalconstvolatileunsignedlongp,\n" - " global const volatile float *globalconstvolatilefloatp)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_scalar_restrict_p(global const volatile void* restrict globalconstvolatilevoidrestrictp,\n" - " global const volatile char * restrict globalconstvolatilecharrestrictp,\n" - " global const volatile uchar*restrict globalconstvolatileucharrestrictp,\n" - " global const volatile unsigned char *restrict globalconstvolatileunsignedcharrestrictp,\n" - " global const volatile short* restrict globalconstvolatileshortrestrictp,\n" - " global const volatile ushort * restrict globalconstvolatileushortrestrictp,\n" - " global const volatile unsigned short*restrict globalconstvolatileunsignedshortrestrictp,\n" - " global const volatile int *restrict globalconstvolatileintrestrictp,\n" - " global const volatile uint* restrict globalconstvolatileuintrestrictp,\n" - " global const volatile unsigned int * restrict globalconstvolatileunsignedintrestrictp,\n" - " global const volatile long*restrict globalconstvolatilelongrestrictp,\n" - " global const volatile ulong *restrict globalconstvolatileulongrestrictp,\n" - " global const volatile unsigned long* restrict globalconstvolatileunsignedlongrestrictp,\n" - " global const volatile float * restrict globalconstvolatilefloatrestrictp)\n" - "{}\n", - "\n" - "kernel void local_scalar_p(local void*localvoidp,\n" - " local char *localcharp,\n" - " local uchar* localucharp,\n" - " local unsigned char * localunsignedcharp,\n" - " local short*localshortp,\n" - " local ushort *localushortp,\n" - " local unsigned short* localunsignedshortp,\n" - " local int * localintp,\n" - " local uint*localuintp,\n" - " local unsigned int *localunsignedintp,\n" - " local long* locallongp,\n" - " local ulong * localulongp,\n" - " local unsigned long*localunsignedlongp,\n" - " local float *localfloatp)\n" - "{}\n", - "\n" - "kernel void local_scalar_restrict_p(local void* restrict localvoidrestrictp,\n" - " local char * restrict localcharrestrictp,\n" - " local uchar*restrict localucharrestrictp,\n" - " local unsigned char *restrict localunsignedcharrestrictp,\n" - " local short* restrict localshortrestrictp,\n" - " local ushort * restrict localushortrestrictp,\n" - " local unsigned short*restrict localunsignedshortrestrictp,\n" - " local int *restrict localintrestrictp,\n" - " local uint* restrict localuintrestrictp,\n" - " local unsigned int * restrict localunsignedintrestrictp,\n" - " local long*restrict locallongrestrictp,\n" - " local ulong *restrict localulongrestrictp,\n" - " local unsigned long* restrict localunsignedlongrestrictp,\n" - " local float * restrict localfloatrestrictp)\n" - "{}\n", - "\n" - "kernel void local_const_scalar_p(local const void*localconstvoidp,\n" - " local const char *localconstcharp,\n" - " local const uchar* localconstucharp,\n" - " local const unsigned char * localconstunsignedcharp,\n" - " local const short*localconstshortp,\n" - " local const ushort *localconstushortp,\n" - " local const unsigned short* localconstunsignedshortp,\n" - " local const int * localconstintp,\n" - " local const uint*localconstuintp,\n" - " local const unsigned int *localconstunsignedintp,\n" - " local const long* localconstlongp,\n" - " local const ulong * localconstulongp,\n" - " local const unsigned long*localconstunsignedlongp,\n" - " local const float *localconstfloatp)\n" - "{}\n", - "\n" - "kernel void local_const_scalar_restrict_p(local const void* restrict localconstvoidrestrictp,\n" - " local const char * restrict localconstcharrestrictp,\n" - " local const uchar*restrict localconstucharrestrictp,\n" - " local const unsigned char *restrict localconstunsignedcharrestrictp,\n" - " local const short* restrict localconstshortrestrictp,\n" - " local const ushort * restrict localconstushortrestrictp,\n" - " local const unsigned short*restrict localconstunsignedshortrestrictp,\n" - " local const int *restrict localconstintrestrictp,\n" - " local const uint* restrict localconstuintrestrictp,\n" - " local const unsigned int * restrict localconstunsignedintrestrictp,\n" - " local const long*restrict localconstlongrestrictp,\n" - " local const ulong *restrict localconstulongrestrictp,\n" - " local const unsigned long* restrict localconstunsignedlongrestrictp,\n" - " local const float * restrict localconstfloatrestrictp)\n" - "{}\n", - "\n" - "kernel void local_volatile_scalar_p(local volatile void*localvolatilevoidp,\n" - " local volatile char *localvolatilecharp,\n" - " local volatile uchar* localvolatileucharp,\n" - " local volatile unsigned char * localvolatileunsignedcharp,\n" - " local volatile short*localvolatileshortp,\n" - " local volatile ushort *localvolatileushortp,\n" - " local volatile unsigned short* localvolatileunsignedshortp,\n" - " local volatile int * localvolatileintp,\n" - " local volatile uint*localvolatileuintp,\n" - " local volatile unsigned int *localvolatileunsignedintp,\n" - " local volatile long* localvolatilelongp,\n" - " local volatile ulong * localvolatileulongp,\n" - " local volatile unsigned long*localvolatileunsignedlongp,\n" - " local volatile float *localvolatilefloatp)\n" - "{}\n", - "\n" - "kernel void local_volatile_scalar_restrict_p(local volatile void* restrict localvolatilevoidrestrictp,\n" - " local volatile char * restrict localvolatilecharrestrictp,\n" - " local volatile uchar*restrict localvolatileucharrestrictp,\n" - " local volatile unsigned char *restrict localvolatileunsignedcharrestrictp,\n" - " local volatile short* restrict localvolatileshortrestrictp,\n" - " local volatile ushort * restrict localvolatileushortrestrictp,\n" - " local volatile unsigned short*restrict localvolatileunsignedshortrestrictp,\n" - " local volatile int *restrict localvolatileintrestrictp,\n" - " local volatile uint* restrict localvolatileuintrestrictp,\n" - " local volatile unsigned int * restrict localvolatileunsignedintrestrictp,\n" - " local volatile long*restrict localvolatilelongrestrictp,\n" - " local volatile ulong *restrict localvolatileulongrestrictp,\n" - " local volatile unsigned long* restrict localvolatileunsignedlongrestrictp,\n" - " local volatile float * restrict localvolatilefloatrestrictp)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_scalar_p(local const volatile void*localconstvolatilevoidp,\n" - " local const volatile char *localconstvolatilecharp,\n" - " local const volatile uchar* localconstvolatileucharp,\n" - " local const volatile unsigned char * localconstvolatileunsignedcharp,\n" - " local const volatile short*localconstvolatileshortp,\n" - " local const volatile ushort *localconstvolatileushortp,\n" - " local const volatile unsigned short* localconstvolatileunsignedshortp,\n" - " local const volatile int * localconstvolatileintp,\n" - " local const volatile uint*localconstvolatileuintp,\n" - " local const volatile unsigned int *localconstvolatileunsignedintp,\n" - " local const volatile long* localconstvolatilelongp,\n" - " local const volatile ulong * localconstvolatileulongp,\n" - " local const volatile unsigned long*localconstvolatileunsignedlongp,\n" - " local const volatile float *localconstvolatilefloatp)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_scalar_restrict_p(local const volatile void* restrict localconstvolatilevoidrestrictp,\n" - " local const volatile char * restrict localconstvolatilecharrestrictp,\n" - " local const volatile uchar*restrict localconstvolatileucharrestrictp,\n" - " local const volatile unsigned char *restrict localconstvolatileunsignedcharrestrictp,\n" - " local const volatile short* restrict localconstvolatileshortrestrictp,\n" - " local const volatile ushort * restrict localconstvolatileushortrestrictp,\n" - " local const volatile unsigned short*restrict localconstvolatileunsignedshortrestrictp,\n" - " local const volatile int *restrict localconstvolatileintrestrictp,\n" - " local const volatile uint* restrict localconstvolatileuintrestrictp,\n" - " local const volatile unsigned int * restrict localconstvolatileunsignedintrestrictp,\n" - " local const volatile long*restrict localconstvolatilelongrestrictp,\n" - " local const volatile ulong *restrict localconstvolatileulongrestrictp,\n" - " local const volatile unsigned long* restrict localconstvolatileunsignedlongrestrictp,\n" - " local const volatile float * restrict localconstvolatilefloatrestrictp)\n" - "{}\n", - "\n" - "kernel void scalar_d(char chard,\n" - " uchar uchard,\n" - " unsigned char unsignedchard,\n" - " short shortd,\n" - " ushort ushortd,\n" - " unsigned short unsignedshortd,\n" - " int intd,\n" - " uint uintd,\n" - " unsigned int unsignedintd,\n" - " long longd,\n" - " ulong ulongd,\n" - " unsigned long unsignedlongd,\n" - " float floatd)\n" - "{}\n", - "\n" - "kernel void const_scalar_d(const char constchard,\n" - " const uchar constuchard,\n" - " const unsigned char constunsignedchard,\n" - " const short constshortd,\n" - " const ushort constushortd,\n" - " const unsigned short constunsignedshortd,\n" - " const int constintd,\n" - " const uint constuintd,\n" - " const unsigned int constunsignedintd,\n" - " const long constlongd,\n" - " const ulong constulongd,\n" - " const unsigned long constunsignedlongd,\n" - " const float constfloatd)\n" - "{}\n", - "\n" - "kernel void private_scalar_d(private char privatechard,\n" - " private uchar privateuchard,\n" - " private unsigned char privateunsignedchard,\n" - " private short privateshortd,\n" - " private ushort privateushortd,\n" - " private unsigned short privateunsignedshortd,\n" - " private int privateintd,\n" - " private uint privateuintd,\n" - " private unsigned int privateunsignedintd,\n" - " private long privatelongd,\n" - " private ulong privateulongd,\n" - " private unsigned long privateunsignedlongd,\n" - " private float privatefloatd)\n" - "{}\n", - "\n" - "kernel void private_const_scalar_d(private const char privateconstchard,\n" - " private const uchar privateconstuchard,\n" - " private const unsigned char privateconstunsignedchard,\n" - " private const short privateconstshortd,\n" - " private const ushort privateconstushortd,\n" - " private const unsigned short privateconstunsignedshortd,\n" - " private const int privateconstintd,\n" - " private const uint privateconstuintd,\n" - " private const unsigned int privateconstunsignedintd,\n" - " private const long privateconstlongd,\n" - " private const ulong privateconstulongd,\n" - " private const unsigned long privateconstunsignedlongd,\n" - " private const float privateconstfloatd)\n" - "{}\n", - "\n" - "kernel void constant_vector2_p0(constant char2*constantchar2p,\n" - " constant uchar2 *constantuchar2p,\n" - " constant short2* constantshort2p,\n" - " constant ushort2 * constantushort2p)\n" - "{}\n", - "\n" - "kernel void constant_vector2_p1(constant int2*constantint2p,\n" - " constant uint2 *constantuint2p,\n" - " constant long2* constantlong2p,\n" - " constant ulong2 * constantulong2p)\n" - "{}\n", - "\n" - "kernel void constant_vector2_p2(constant float2*constantfloat2p)\n" - "{}\n", - "\n" - "kernel void constant_vector2_restrict_p0(constant char2 *restrict constantchar2restrictp,\n" - " constant uchar2* restrict constantuchar2restrictp,\n" - " constant short2 * restrict constantshort2restrictp,\n" - " constant ushort2*restrict constantushort2restrictp)\n" - "{}\n", - "\n" - "kernel void constant_vector2_restrict_p1(constant int2 *restrict constantint2restrictp,\n" - " constant uint2* restrict constantuint2restrictp,\n" - " constant long2 * restrict constantlong2restrictp,\n" - " constant ulong2*restrict constantulong2restrictp)\n" - "{}\n", - "\n" - "kernel void constant_vector2_restrict_p2(constant float2 *restrict constantfloat2restrictp)\n" - "{}\n", - "\n" - "kernel void global_vector2_p(global char2*globalchar2p,\n" - " global uchar2 *globaluchar2p,\n" - " global short2* globalshort2p,\n" - " global ushort2 * globalushort2p,\n" - " global int2*globalint2p,\n" - " global uint2 *globaluint2p,\n" - " global long2* globallong2p,\n" - " global ulong2 * globalulong2p,\n" - " global float2*globalfloat2p)\n" - "{}\n", - "\n" - "kernel void global_vector2_restrict_p(global char2 *restrict globalchar2restrictp,\n" - " global uchar2* restrict globaluchar2restrictp,\n" - " global short2 * restrict globalshort2restrictp,\n" - " global ushort2*restrict globalushort2restrictp,\n" - " global int2 *restrict globalint2restrictp,\n" - " global uint2* restrict globaluint2restrictp,\n" - " global long2 * restrict globallong2restrictp,\n" - " global ulong2*restrict globalulong2restrictp,\n" - " global float2 *restrict globalfloat2restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_vector2_p(global const char2* globalconstchar2p,\n" - " global const uchar2 * globalconstuchar2p,\n" - " global const short2*globalconstshort2p,\n" - " global const ushort2 *globalconstushort2p,\n" - " global const int2* globalconstint2p,\n" - " global const uint2 * globalconstuint2p,\n" - " global const long2*globalconstlong2p,\n" - " global const ulong2 *globalconstulong2p,\n" - " global const float2* globalconstfloat2p)\n" - "{}\n", - "\n" - "kernel void global_const_vector2_restrict_p(global const char2 * restrict globalconstchar2restrictp,\n" - " global const uchar2*restrict globalconstuchar2restrictp,\n" - " global const short2 *restrict globalconstshort2restrictp,\n" - " global const ushort2* restrict globalconstushort2restrictp,\n" - " global const int2 * restrict globalconstint2restrictp,\n" - " global const uint2*restrict globalconstuint2restrictp,\n" - " global const long2 *restrict globalconstlong2restrictp,\n" - " global const ulong2* restrict globalconstulong2restrictp,\n" - " global const float2 * restrict globalconstfloat2restrictp)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector2_p(global volatile char2*globalvolatilechar2p,\n" - " global volatile uchar2 *globalvolatileuchar2p,\n" - " global volatile short2* globalvolatileshort2p,\n" - " global volatile ushort2 * globalvolatileushort2p,\n" - " global volatile int2*globalvolatileint2p,\n" - " global volatile uint2 *globalvolatileuint2p,\n" - " global volatile long2* globalvolatilelong2p,\n" - " global volatile ulong2 * globalvolatileulong2p,\n" - " global volatile float2*globalvolatilefloat2p)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector2_restrict_p(global volatile char2 *restrict globalvolatilechar2restrictp,\n" - " global volatile uchar2* restrict globalvolatileuchar2restrictp,\n" - " global volatile short2 * restrict globalvolatileshort2restrictp,\n" - " global volatile ushort2*restrict globalvolatileushort2restrictp,\n" - " global volatile int2 *restrict globalvolatileint2restrictp,\n" - " global volatile uint2* restrict globalvolatileuint2restrictp,\n" - " global volatile long2 * restrict globalvolatilelong2restrictp,\n" - " global volatile ulong2*restrict globalvolatileulong2restrictp,\n" - " global volatile float2 *restrict globalvolatilefloat2restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector2_p(global const volatile char2* globalconstvolatilechar2p,\n" - " global const volatile uchar2 * globalconstvolatileuchar2p,\n" - " global const volatile short2*globalconstvolatileshort2p,\n" - " global const volatile ushort2 *globalconstvolatileushort2p,\n" - " global const volatile int2* globalconstvolatileint2p,\n" - " global const volatile uint2 * globalconstvolatileuint2p,\n" - " global const volatile long2*globalconstvolatilelong2p,\n" - " global const volatile ulong2 *globalconstvolatileulong2p,\n" - " global const volatile float2* globalconstvolatilefloat2p)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector2_restrict_p(global const volatile char2 * restrict globalconstvolatilechar2restrictp,\n" - " global const volatile uchar2*restrict globalconstvolatileuchar2restrictp,\n" - " global const volatile short2 *restrict globalconstvolatileshort2restrictp,\n" - " global const volatile ushort2* restrict globalconstvolatileushort2restrictp,\n" - " global const volatile int2 * restrict globalconstvolatileint2restrictp,\n" - " global const volatile uint2*restrict globalconstvolatileuint2restrictp,\n" - " global const volatile long2 *restrict globalconstvolatilelong2restrictp,\n" - " global const volatile ulong2* restrict globalconstvolatileulong2restrictp,\n" - " global const volatile float2 * restrict globalconstvolatilefloat2restrictp)\n" - "{}\n", - "\n" - "kernel void local_vector2_p(local char2*localchar2p,\n" - " local uchar2 *localuchar2p,\n" - " local short2* localshort2p,\n" - " local ushort2 * localushort2p,\n" - " local int2*localint2p,\n" - " local uint2 *localuint2p,\n" - " local long2* locallong2p,\n" - " local ulong2 * localulong2p,\n" - " local float2*localfloat2p)\n" - "{}\n", - "\n" - "kernel void local_vector2_restrict_p(local char2 *restrict localchar2restrictp,\n" - " local uchar2* restrict localuchar2restrictp,\n" - " local short2 * restrict localshort2restrictp,\n" - " local ushort2*restrict localushort2restrictp,\n" - " local int2 *restrict localint2restrictp,\n" - " local uint2* restrict localuint2restrictp,\n" - " local long2 * restrict locallong2restrictp,\n" - " local ulong2*restrict localulong2restrictp,\n" - " local float2 *restrict localfloat2restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_vector2_p(local const char2* localconstchar2p,\n" - " local const uchar2 * localconstuchar2p,\n" - " local const short2*localconstshort2p,\n" - " local const ushort2 *localconstushort2p,\n" - " local const int2* localconstint2p,\n" - " local const uint2 * localconstuint2p,\n" - " local const long2*localconstlong2p,\n" - " local const ulong2 *localconstulong2p,\n" - " local const float2* localconstfloat2p)\n" - "{}\n", - "\n" - "kernel void local_const_vector2_restrict_p(local const char2 * restrict localconstchar2restrictp,\n" - " local const uchar2*restrict localconstuchar2restrictp,\n" - " local const short2 *restrict localconstshort2restrictp,\n" - " local const ushort2* restrict localconstushort2restrictp,\n" - " local const int2 * restrict localconstint2restrictp,\n" - " local const uint2*restrict localconstuint2restrictp,\n" - " local const long2 *restrict localconstlong2restrictp,\n" - " local const ulong2* restrict localconstulong2restrictp,\n" - " local const float2 * restrict localconstfloat2restrictp)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector2_p(local volatile char2*localvolatilechar2p,\n" - " local volatile uchar2 *localvolatileuchar2p,\n" - " local volatile short2* localvolatileshort2p,\n" - " local volatile ushort2 * localvolatileushort2p,\n" - " local volatile int2*localvolatileint2p,\n" - " local volatile uint2 *localvolatileuint2p,\n" - " local volatile long2* localvolatilelong2p,\n" - " local volatile ulong2 * localvolatileulong2p,\n" - " local volatile float2*localvolatilefloat2p)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector2_restrict_p(local volatile char2 *restrict localvolatilechar2restrictp,\n" - " local volatile uchar2* restrict localvolatileuchar2restrictp,\n" - " local volatile short2 * restrict localvolatileshort2restrictp,\n" - " local volatile ushort2*restrict localvolatileushort2restrictp,\n" - " local volatile int2 *restrict localvolatileint2restrictp,\n" - " local volatile uint2* restrict localvolatileuint2restrictp,\n" - " local volatile long2 * restrict localvolatilelong2restrictp,\n" - " local volatile ulong2*restrict localvolatileulong2restrictp,\n" - " local volatile float2 *restrict localvolatilefloat2restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector2_p(local const volatile char2* localconstvolatilechar2p,\n" - " local const volatile uchar2 * localconstvolatileuchar2p,\n" - " local const volatile short2*localconstvolatileshort2p,\n" - " local const volatile ushort2 *localconstvolatileushort2p,\n" - " local const volatile int2* localconstvolatileint2p,\n" - " local const volatile uint2 * localconstvolatileuint2p,\n" - " local const volatile long2*localconstvolatilelong2p,\n" - " local const volatile ulong2 *localconstvolatileulong2p,\n" - " local const volatile float2* localconstvolatilefloat2p)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector2_restrict_p(local const volatile char2 * restrict localconstvolatilechar2restrictp,\n" - " local const volatile uchar2*restrict localconstvolatileuchar2restrictp,\n" - " local const volatile short2 *restrict localconstvolatileshort2restrictp,\n" - " local const volatile ushort2* restrict localconstvolatileushort2restrictp,\n" - " local const volatile int2 * restrict localconstvolatileint2restrictp,\n" - " local const volatile uint2*restrict localconstvolatileuint2restrictp,\n" - " local const volatile long2 *restrict localconstvolatilelong2restrictp,\n" - " local const volatile ulong2* restrict localconstvolatileulong2restrictp,\n" - " local const volatile float2 * restrict localconstvolatilefloat2restrictp)\n" - "{}\n", - "\n" - "kernel void vector2_d(char2 char2d,\n" - " uchar2 uchar2d,\n" - " short2 short2d,\n" - " ushort2 ushort2d,\n" - " int2 int2d,\n" - " uint2 uint2d,\n" - " long2 long2d,\n" - " ulong2 ulong2d,\n" - " float2 float2d)\n" - "{}\n", - "\n" - "kernel void const_vector2_d(const char2 constchar2d,\n" - " const uchar2 constuchar2d,\n" - " const short2 constshort2d,\n" - " const ushort2 constushort2d,\n" - " const int2 constint2d,\n" - " const uint2 constuint2d,\n" - " const long2 constlong2d,\n" - " const ulong2 constulong2d,\n" - " const float2 constfloat2d)\n" - "{}\n", - "\n" - "kernel void private_vector2_d(private char2 privatechar2d,\n" - " private uchar2 privateuchar2d,\n" - " private short2 privateshort2d,\n" - " private ushort2 privateushort2d,\n" - " private int2 privateint2d,\n" - " private uint2 privateuint2d,\n" - " private long2 privatelong2d,\n" - " private ulong2 privateulong2d,\n" - " private float2 privatefloat2d)\n" - "{}\n", - "\n" - "kernel void private_const_vector2_d(private const char2 privateconstchar2d,\n" - " private const uchar2 privateconstuchar2d,\n" - " private const short2 privateconstshort2d,\n" - " private const ushort2 privateconstushort2d,\n" - " private const int2 privateconstint2d,\n" - " private const uint2 privateconstuint2d,\n" - " private const long2 privateconstlong2d,\n" - " private const ulong2 privateconstulong2d,\n" - " private const float2 privateconstfloat2d)\n" - "{}\n", - "\n" - "kernel void constant_vector3_p0(constant char3*constantchar3p,\n" - " constant uchar3 *constantuchar3p,\n" - " constant short3* constantshort3p,\n" - " constant ushort3 * constantushort3p)\n" - "{}\n", - "\n" - "kernel void constant_vector3_p1(constant int3*constantint3p,\n" - " constant uint3 *constantuint3p,\n" - " constant long3* constantlong3p,\n" - " constant ulong3 * constantulong3p)\n" - "{}\n", - "\n" - "kernel void constant_vector3_p2(constant float3*constantfloat3p)\n" - "{}\n", - "\n" - "kernel void constant_vector3_restrict_p0(constant char3 *restrict constantchar3restrictp,\n" - " constant uchar3* restrict constantuchar3restrictp,\n" - " constant short3 * restrict constantshort3restrictp,\n" - " constant ushort3*restrict constantushort3restrictp)\n" - "{}\n", - "\n" - "kernel void constant_vector3_restrict_p1(constant int3 *restrict constantint3restrictp,\n" - " constant uint3* restrict constantuint3restrictp,\n" - " constant long3 * restrict constantlong3restrictp,\n" - " constant ulong3*restrict constantulong3restrictp)\n" - "{}\n", - "\n" - "kernel void constant_vector3_restrict_p2(constant float3 *restrict constantfloat3restrictp)\n" - "{}\n", - "\n" - "kernel void global_vector3_p(global char3*globalchar3p,\n" - " global uchar3 *globaluchar3p,\n" - " global short3* globalshort3p,\n" - " global ushort3 * globalushort3p,\n" - " global int3*globalint3p,\n" - " global uint3 *globaluint3p,\n" - " global long3* globallong3p,\n" - " global ulong3 * globalulong3p,\n" - " global float3*globalfloat3p)\n" - "{}\n", - "\n" - "kernel void global_vector3_restrict_p(global char3 *restrict globalchar3restrictp,\n" - " global uchar3* restrict globaluchar3restrictp,\n" - " global short3 * restrict globalshort3restrictp,\n" - " global ushort3*restrict globalushort3restrictp,\n" - " global int3 *restrict globalint3restrictp,\n" - " global uint3* restrict globaluint3restrictp,\n" - " global long3 * restrict globallong3restrictp,\n" - " global ulong3*restrict globalulong3restrictp,\n" - " global float3 *restrict globalfloat3restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_vector3_p(global const char3* globalconstchar3p,\n" - " global const uchar3 * globalconstuchar3p,\n" - " global const short3*globalconstshort3p,\n" - " global const ushort3 *globalconstushort3p,\n" - " global const int3* globalconstint3p,\n" - " global const uint3 * globalconstuint3p,\n" - " global const long3*globalconstlong3p,\n" - " global const ulong3 *globalconstulong3p,\n" - " global const float3* globalconstfloat3p)\n" - "{}\n", - "\n" - "kernel void global_const_vector3_restrict_p(global const char3 * restrict globalconstchar3restrictp,\n" - " global const uchar3*restrict globalconstuchar3restrictp,\n" - " global const short3 *restrict globalconstshort3restrictp,\n" - " global const ushort3* restrict globalconstushort3restrictp,\n" - " global const int3 * restrict globalconstint3restrictp,\n" - " global const uint3*restrict globalconstuint3restrictp,\n" - " global const long3 *restrict globalconstlong3restrictp,\n" - " global const ulong3* restrict globalconstulong3restrictp,\n" - " global const float3 * restrict globalconstfloat3restrictp)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector3_p(global volatile char3*globalvolatilechar3p,\n" - " global volatile uchar3 *globalvolatileuchar3p,\n" - " global volatile short3* globalvolatileshort3p,\n" - " global volatile ushort3 * globalvolatileushort3p,\n" - " global volatile int3*globalvolatileint3p,\n" - " global volatile uint3 *globalvolatileuint3p,\n" - " global volatile long3* globalvolatilelong3p,\n" - " global volatile ulong3 * globalvolatileulong3p,\n" - " global volatile float3*globalvolatilefloat3p)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector3_restrict_p(global volatile char3 *restrict globalvolatilechar3restrictp,\n" - " global volatile uchar3* restrict globalvolatileuchar3restrictp,\n" - " global volatile short3 * restrict globalvolatileshort3restrictp,\n" - " global volatile ushort3*restrict globalvolatileushort3restrictp,\n" - " global volatile int3 *restrict globalvolatileint3restrictp,\n" - " global volatile uint3* restrict globalvolatileuint3restrictp,\n" - " global volatile long3 * restrict globalvolatilelong3restrictp,\n" - " global volatile ulong3*restrict globalvolatileulong3restrictp,\n" - " global volatile float3 *restrict globalvolatilefloat3restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector3_p(global const volatile char3* globalconstvolatilechar3p,\n" - " global const volatile uchar3 * globalconstvolatileuchar3p,\n" - " global const volatile short3*globalconstvolatileshort3p,\n" - " global const volatile ushort3 *globalconstvolatileushort3p,\n" - " global const volatile int3* globalconstvolatileint3p,\n" - " global const volatile uint3 * globalconstvolatileuint3p,\n" - " global const volatile long3*globalconstvolatilelong3p,\n" - " global const volatile ulong3 *globalconstvolatileulong3p,\n" - " global const volatile float3* globalconstvolatilefloat3p)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector3_restrict_p(global const volatile char3 * restrict globalconstvolatilechar3restrictp,\n" - " global const volatile uchar3*restrict globalconstvolatileuchar3restrictp,\n" - " global const volatile short3 *restrict globalconstvolatileshort3restrictp,\n" - " global const volatile ushort3* restrict globalconstvolatileushort3restrictp,\n" - " global const volatile int3 * restrict globalconstvolatileint3restrictp,\n" - " global const volatile uint3*restrict globalconstvolatileuint3restrictp,\n" - " global const volatile long3 *restrict globalconstvolatilelong3restrictp,\n" - " global const volatile ulong3* restrict globalconstvolatileulong3restrictp,\n" - " global const volatile float3 * restrict globalconstvolatilefloat3restrictp)\n" - "{}\n", - "\n" - "kernel void local_vector3_p(local char3*localchar3p,\n" - " local uchar3 *localuchar3p,\n" - " local short3* localshort3p,\n" - " local ushort3 * localushort3p,\n" - " local int3*localint3p,\n" - " local uint3 *localuint3p,\n" - " local long3* locallong3p,\n" - " local ulong3 * localulong3p,\n" - " local float3*localfloat3p)\n" - "{}\n", - "\n" - "kernel void local_vector3_restrict_p(local char3 *restrict localchar3restrictp,\n" - " local uchar3* restrict localuchar3restrictp,\n" - " local short3 * restrict localshort3restrictp,\n" - " local ushort3*restrict localushort3restrictp,\n" - " local int3 *restrict localint3restrictp,\n" - " local uint3* restrict localuint3restrictp,\n" - " local long3 * restrict locallong3restrictp,\n" - " local ulong3*restrict localulong3restrictp,\n" - " local float3 *restrict localfloat3restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_vector3_p(local const char3* localconstchar3p,\n" - " local const uchar3 * localconstuchar3p,\n" - " local const short3*localconstshort3p,\n" - " local const ushort3 *localconstushort3p,\n" - " local const int3* localconstint3p,\n" - " local const uint3 * localconstuint3p,\n" - " local const long3*localconstlong3p,\n" - " local const ulong3 *localconstulong3p,\n" - " local const float3* localconstfloat3p)\n" - "{}\n", - "\n" - "kernel void local_const_vector3_restrict_p(local const char3 * restrict localconstchar3restrictp,\n" - " local const uchar3*restrict localconstuchar3restrictp,\n" - " local const short3 *restrict localconstshort3restrictp,\n" - " local const ushort3* restrict localconstushort3restrictp,\n" - " local const int3 * restrict localconstint3restrictp,\n" - " local const uint3*restrict localconstuint3restrictp,\n" - " local const long3 *restrict localconstlong3restrictp,\n" - " local const ulong3* restrict localconstulong3restrictp,\n" - " local const float3 * restrict localconstfloat3restrictp)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector3_p(local volatile char3*localvolatilechar3p,\n" - " local volatile uchar3 *localvolatileuchar3p,\n" - " local volatile short3* localvolatileshort3p,\n" - " local volatile ushort3 * localvolatileushort3p,\n" - " local volatile int3*localvolatileint3p,\n" - " local volatile uint3 *localvolatileuint3p,\n" - " local volatile long3* localvolatilelong3p,\n" - " local volatile ulong3 * localvolatileulong3p,\n" - " local volatile float3*localvolatilefloat3p)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector3_restrict_p(local volatile char3 *restrict localvolatilechar3restrictp,\n" - " local volatile uchar3* restrict localvolatileuchar3restrictp,\n" - " local volatile short3 * restrict localvolatileshort3restrictp,\n" - " local volatile ushort3*restrict localvolatileushort3restrictp,\n" - " local volatile int3 *restrict localvolatileint3restrictp,\n" - " local volatile uint3* restrict localvolatileuint3restrictp,\n" - " local volatile long3 * restrict localvolatilelong3restrictp,\n" - " local volatile ulong3*restrict localvolatileulong3restrictp,\n" - " local volatile float3 *restrict localvolatilefloat3restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector3_p(local const volatile char3* localconstvolatilechar3p,\n" - " local const volatile uchar3 * localconstvolatileuchar3p,\n" - " local const volatile short3*localconstvolatileshort3p,\n" - " local const volatile ushort3 *localconstvolatileushort3p,\n" - " local const volatile int3* localconstvolatileint3p,\n" - " local const volatile uint3 * localconstvolatileuint3p,\n" - " local const volatile long3*localconstvolatilelong3p,\n" - " local const volatile ulong3 *localconstvolatileulong3p,\n" - " local const volatile float3* localconstvolatilefloat3p)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector3_restrict_p(local const volatile char3 * restrict localconstvolatilechar3restrictp,\n" - " local const volatile uchar3*restrict localconstvolatileuchar3restrictp,\n" - " local const volatile short3 *restrict localconstvolatileshort3restrictp,\n" - " local const volatile ushort3* restrict localconstvolatileushort3restrictp,\n" - " local const volatile int3 * restrict localconstvolatileint3restrictp,\n" - " local const volatile uint3*restrict localconstvolatileuint3restrictp,\n" - " local const volatile long3 *restrict localconstvolatilelong3restrictp,\n" - " local const volatile ulong3* restrict localconstvolatileulong3restrictp,\n" - " local const volatile float3 * restrict localconstvolatilefloat3restrictp)\n" - "{}\n", - "\n" - "kernel void vector3_d(char3 char3d,\n" - " uchar3 uchar3d,\n" - " short3 short3d,\n" - " ushort3 ushort3d,\n" - " int3 int3d,\n" - " uint3 uint3d,\n" - " long3 long3d,\n" - " ulong3 ulong3d,\n" - " float3 float3d)\n" - "{}\n", - "\n" - "kernel void const_vector3_d(const char3 constchar3d,\n" - " const uchar3 constuchar3d,\n" - " const short3 constshort3d,\n" - " const ushort3 constushort3d,\n" - " const int3 constint3d,\n" - " const uint3 constuint3d,\n" - " const long3 constlong3d,\n" - " const ulong3 constulong3d,\n" - " const float3 constfloat3d)\n" - "{}\n", - "\n" - "kernel void private_vector3_d(private char3 privatechar3d,\n" - " private uchar3 privateuchar3d,\n" - " private short3 privateshort3d,\n" - " private ushort3 privateushort3d,\n" - " private int3 privateint3d,\n" - " private uint3 privateuint3d,\n" - " private long3 privatelong3d,\n" - " private ulong3 privateulong3d,\n" - " private float3 privatefloat3d)\n" - "{}\n", - "\n" - "kernel void private_const_vector3_d(private const char3 privateconstchar3d,\n" - " private const uchar3 privateconstuchar3d,\n" - " private const short3 privateconstshort3d,\n" - " private const ushort3 privateconstushort3d,\n" - " private const int3 privateconstint3d,\n" - " private const uint3 privateconstuint3d,\n" - " private const long3 privateconstlong3d,\n" - " private const ulong3 privateconstulong3d,\n" - " private const float3 privateconstfloat3d)\n" - "{}\n", - "\n" - "kernel void constant_vector4_p0(constant char4*constantchar4p,\n" - " constant uchar4 *constantuchar4p,\n" - " constant short4* constantshort4p,\n" - " constant ushort4 * constantushort4p)\n" - "{}\n", - "\n" - "kernel void constant_vector4_p1(constant int4*constantint4p,\n" - " constant uint4 *constantuint4p,\n" - " constant long4* constantlong4p,\n" - " constant ulong4 * constantulong4p)\n" - "{}\n", - "\n" - "kernel void constant_vector4_p2(constant float4*constantfloat4p)\n" - "{}\n", - "\n" - "kernel void constant_vector4_restrict_p0(constant char4 *restrict constantchar4restrictp,\n" - " constant uchar4* restrict constantuchar4restrictp,\n" - " constant short4 * restrict constantshort4restrictp,\n" - " constant ushort4*restrict constantushort4restrictp)\n" - "{}\n", - "\n" - "kernel void constant_vector4_restrict_p1(constant int4 *restrict constantint4restrictp,\n" - " constant uint4* restrict constantuint4restrictp,\n" - " constant long4 * restrict constantlong4restrictp,\n" - " constant ulong4*restrict constantulong4restrictp)\n" - "{}\n", - "\n" - "kernel void constant_vector4_restrict_p2(constant float4 *restrict constantfloat4restrictp)\n" - "{}\n", - "\n" - "kernel void global_vector4_p(global char4*globalchar4p,\n" - " global uchar4 *globaluchar4p,\n" - " global short4* globalshort4p,\n" - " global ushort4 * globalushort4p,\n" - " global int4*globalint4p,\n" - " global uint4 *globaluint4p,\n" - " global long4* globallong4p,\n" - " global ulong4 * globalulong4p,\n" - " global float4*globalfloat4p)\n" - "{}\n", - "\n" - "kernel void global_vector4_restrict_p(global char4 *restrict globalchar4restrictp,\n" - " global uchar4* restrict globaluchar4restrictp,\n" - " global short4 * restrict globalshort4restrictp,\n" - " global ushort4*restrict globalushort4restrictp,\n" - " global int4 *restrict globalint4restrictp,\n" - " global uint4* restrict globaluint4restrictp,\n" - " global long4 * restrict globallong4restrictp,\n" - " global ulong4*restrict globalulong4restrictp,\n" - " global float4 *restrict globalfloat4restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_vector4_p(global const char4* globalconstchar4p,\n" - " global const uchar4 * globalconstuchar4p,\n" - " global const short4*globalconstshort4p,\n" - " global const ushort4 *globalconstushort4p,\n" - " global const int4* globalconstint4p,\n" - " global const uint4 * globalconstuint4p,\n" - " global const long4*globalconstlong4p,\n" - " global const ulong4 *globalconstulong4p,\n" - " global const float4* globalconstfloat4p)\n" - "{}\n", - "\n" - "kernel void global_const_vector4_restrict_p(global const char4 * restrict globalconstchar4restrictp,\n" - " global const uchar4*restrict globalconstuchar4restrictp,\n" - " global const short4 *restrict globalconstshort4restrictp,\n" - " global const ushort4* restrict globalconstushort4restrictp,\n" - " global const int4 * restrict globalconstint4restrictp,\n" - " global const uint4*restrict globalconstuint4restrictp,\n" - " global const long4 *restrict globalconstlong4restrictp,\n" - " global const ulong4* restrict globalconstulong4restrictp,\n" - " global const float4 * restrict globalconstfloat4restrictp)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector4_p(global volatile char4*globalvolatilechar4p,\n" - " global volatile uchar4 *globalvolatileuchar4p,\n" - " global volatile short4* globalvolatileshort4p,\n" - " global volatile ushort4 * globalvolatileushort4p,\n" - " global volatile int4*globalvolatileint4p,\n" - " global volatile uint4 *globalvolatileuint4p,\n" - " global volatile long4* globalvolatilelong4p,\n" - " global volatile ulong4 * globalvolatileulong4p,\n" - " global volatile float4*globalvolatilefloat4p)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector4_restrict_p(global volatile char4 *restrict globalvolatilechar4restrictp,\n" - " global volatile uchar4* restrict globalvolatileuchar4restrictp,\n" - " global volatile short4 * restrict globalvolatileshort4restrictp,\n" - " global volatile ushort4*restrict globalvolatileushort4restrictp,\n" - " global volatile int4 *restrict globalvolatileint4restrictp,\n" - " global volatile uint4* restrict globalvolatileuint4restrictp,\n" - " global volatile long4 * restrict globalvolatilelong4restrictp,\n" - " global volatile ulong4*restrict globalvolatileulong4restrictp,\n" - " global volatile float4 *restrict globalvolatilefloat4restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector4_p(global const volatile char4* globalconstvolatilechar4p,\n" - " global const volatile uchar4 * globalconstvolatileuchar4p,\n" - " global const volatile short4*globalconstvolatileshort4p,\n" - " global const volatile ushort4 *globalconstvolatileushort4p,\n" - " global const volatile int4* globalconstvolatileint4p,\n" - " global const volatile uint4 * globalconstvolatileuint4p,\n" - " global const volatile long4*globalconstvolatilelong4p,\n" - " global const volatile ulong4 *globalconstvolatileulong4p,\n" - " global const volatile float4* globalconstvolatilefloat4p)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector4_restrict_p(global const volatile char4 * restrict globalconstvolatilechar4restrictp,\n" - " global const volatile uchar4*restrict globalconstvolatileuchar4restrictp,\n" - " global const volatile short4 *restrict globalconstvolatileshort4restrictp,\n" - " global const volatile ushort4* restrict globalconstvolatileushort4restrictp,\n" - " global const volatile int4 * restrict globalconstvolatileint4restrictp,\n" - " global const volatile uint4*restrict globalconstvolatileuint4restrictp,\n" - " global const volatile long4 *restrict globalconstvolatilelong4restrictp,\n" - " global const volatile ulong4* restrict globalconstvolatileulong4restrictp,\n" - " global const volatile float4 * restrict globalconstvolatilefloat4restrictp)\n" - "{}\n", - "\n" - "kernel void local_vector4_p(local char4*localchar4p,\n" - " local uchar4 *localuchar4p,\n" - " local short4* localshort4p,\n" - " local ushort4 * localushort4p,\n" - " local int4*localint4p,\n" - " local uint4 *localuint4p,\n" - " local long4* locallong4p,\n" - " local ulong4 * localulong4p,\n" - " local float4*localfloat4p)\n" - "{}\n", - "\n" - "kernel void local_vector4_restrict_p(local char4 *restrict localchar4restrictp,\n" - " local uchar4* restrict localuchar4restrictp,\n" - " local short4 * restrict localshort4restrictp,\n" - " local ushort4*restrict localushort4restrictp,\n" - " local int4 *restrict localint4restrictp,\n" - " local uint4* restrict localuint4restrictp,\n" - " local long4 * restrict locallong4restrictp,\n" - " local ulong4*restrict localulong4restrictp,\n" - " local float4 *restrict localfloat4restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_vector4_p(local const char4* localconstchar4p,\n" - " local const uchar4 * localconstuchar4p,\n" - " local const short4*localconstshort4p,\n" - " local const ushort4 *localconstushort4p,\n" - " local const int4* localconstint4p,\n" - " local const uint4 * localconstuint4p,\n" - " local const long4*localconstlong4p,\n" - " local const ulong4 *localconstulong4p,\n" - " local const float4* localconstfloat4p)\n" - "{}\n", - "\n" - "kernel void local_const_vector4_restrict_p(local const char4 * restrict localconstchar4restrictp,\n" - " local const uchar4*restrict localconstuchar4restrictp,\n" - " local const short4 *restrict localconstshort4restrictp,\n" - " local const ushort4* restrict localconstushort4restrictp,\n" - " local const int4 * restrict localconstint4restrictp,\n" - " local const uint4*restrict localconstuint4restrictp,\n" - " local const long4 *restrict localconstlong4restrictp,\n" - " local const ulong4* restrict localconstulong4restrictp,\n" - " local const float4 * restrict localconstfloat4restrictp)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector4_p(local volatile char4*localvolatilechar4p,\n" - " local volatile uchar4 *localvolatileuchar4p,\n" - " local volatile short4* localvolatileshort4p,\n" - " local volatile ushort4 * localvolatileushort4p,\n" - " local volatile int4*localvolatileint4p,\n" - " local volatile uint4 *localvolatileuint4p,\n" - " local volatile long4* localvolatilelong4p,\n" - " local volatile ulong4 * localvolatileulong4p,\n" - " local volatile float4*localvolatilefloat4p)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector4_restrict_p(local volatile char4 *restrict localvolatilechar4restrictp,\n" - " local volatile uchar4* restrict localvolatileuchar4restrictp,\n" - " local volatile short4 * restrict localvolatileshort4restrictp,\n" - " local volatile ushort4*restrict localvolatileushort4restrictp,\n" - " local volatile int4 *restrict localvolatileint4restrictp,\n" - " local volatile uint4* restrict localvolatileuint4restrictp,\n" - " local volatile long4 * restrict localvolatilelong4restrictp,\n" - " local volatile ulong4*restrict localvolatileulong4restrictp,\n" - " local volatile float4 *restrict localvolatilefloat4restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector4_p(local const volatile char4* localconstvolatilechar4p,\n" - " local const volatile uchar4 * localconstvolatileuchar4p,\n" - " local const volatile short4*localconstvolatileshort4p,\n" - " local const volatile ushort4 *localconstvolatileushort4p,\n" - " local const volatile int4* localconstvolatileint4p,\n" - " local const volatile uint4 * localconstvolatileuint4p,\n" - " local const volatile long4*localconstvolatilelong4p,\n" - " local const volatile ulong4 *localconstvolatileulong4p,\n" - " local const volatile float4* localconstvolatilefloat4p)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector4_restrict_p(local const volatile char4 * restrict localconstvolatilechar4restrictp,\n" - " local const volatile uchar4*restrict localconstvolatileuchar4restrictp,\n" - " local const volatile short4 *restrict localconstvolatileshort4restrictp,\n" - " local const volatile ushort4* restrict localconstvolatileushort4restrictp,\n" - " local const volatile int4 * restrict localconstvolatileint4restrictp,\n" - " local const volatile uint4*restrict localconstvolatileuint4restrictp,\n" - " local const volatile long4 *restrict localconstvolatilelong4restrictp,\n" - " local const volatile ulong4* restrict localconstvolatileulong4restrictp,\n" - " local const volatile float4 * restrict localconstvolatilefloat4restrictp)\n" - "{}\n", - "\n" - "kernel void vector4_d(char4 char4d,\n" - " uchar4 uchar4d,\n" - " short4 short4d,\n" - " ushort4 ushort4d,\n" - " int4 int4d,\n" - " uint4 uint4d,\n" - " long4 long4d,\n" - " ulong4 ulong4d,\n" - " float4 float4d)\n" - "{}\n", - "\n" - "kernel void const_vector4_d(const char4 constchar4d,\n" - " const uchar4 constuchar4d,\n" - " const short4 constshort4d,\n" - " const ushort4 constushort4d,\n" - " const int4 constint4d,\n" - " const uint4 constuint4d,\n" - " const long4 constlong4d,\n" - " const ulong4 constulong4d,\n" - " const float4 constfloat4d)\n" - "{}\n", - "\n" - "kernel void private_vector4_d(private char4 privatechar4d,\n" - " private uchar4 privateuchar4d,\n" - " private short4 privateshort4d,\n" - " private ushort4 privateushort4d,\n" - " private int4 privateint4d,\n" - " private uint4 privateuint4d,\n" - " private long4 privatelong4d,\n" - " private ulong4 privateulong4d,\n" - " private float4 privatefloat4d)\n" - "{}\n", - "\n" - "kernel void private_const_vector4_d(private const char4 privateconstchar4d,\n" - " private const uchar4 privateconstuchar4d,\n" - " private const short4 privateconstshort4d,\n" - " private const ushort4 privateconstushort4d,\n" - " private const int4 privateconstint4d,\n" - " private const uint4 privateconstuint4d,\n" - " private const long4 privateconstlong4d,\n" - " private const ulong4 privateconstulong4d,\n" - " private const float4 privateconstfloat4d)\n" - "{}\n", - "\n" - "kernel void constant_vector8_p0(constant char8*constantchar8p,\n" - " constant uchar8 *constantuchar8p,\n" - " constant short8* constantshort8p,\n" - " constant ushort8 * constantushort8p)\n" - "{}\n", - "\n" - "kernel void constant_vector8_p1(constant int8*constantint8p,\n" - " constant uint8 *constantuint8p,\n" - " constant long8* constantlong8p,\n" - " constant ulong8 * constantulong8p)\n" - "{}\n", - "\n" - "kernel void constant_vector8_p2(constant float8*constantfloat8p)\n" - "{}\n", - "\n" - "kernel void constant_vector8_restrict_p0(constant char8 *restrict constantchar8restrictp,\n" - " constant uchar8* restrict constantuchar8restrictp,\n" - " constant short8 * restrict constantshort8restrictp,\n" - " constant ushort8*restrict constantushort8restrictp)\n" - "{}\n", - "\n" - "kernel void constant_vector8_restrict_p1(constant int8 *restrict constantint8restrictp,\n" - " constant uint8* restrict constantuint8restrictp,\n" - " constant long8 * restrict constantlong8restrictp,\n" - " constant ulong8*restrict constantulong8restrictp)\n" - "{}\n", - "\n" - "kernel void constant_vector8_restrict_p2(constant float8 *restrict constantfloat8restrictp)\n" - "{}\n", - "\n" - "kernel void global_vector8_p(global char8*globalchar8p,\n" - " global uchar8 *globaluchar8p,\n" - " global short8* globalshort8p,\n" - " global ushort8 * globalushort8p,\n" - " global int8*globalint8p,\n" - " global uint8 *globaluint8p,\n" - " global long8* globallong8p,\n" - " global ulong8 * globalulong8p,\n" - " global float8*globalfloat8p)\n" - "{}\n", - "\n" - "kernel void global_vector8_restrict_p(global char8 *restrict globalchar8restrictp,\n" - " global uchar8* restrict globaluchar8restrictp,\n" - " global short8 * restrict globalshort8restrictp,\n" - " global ushort8*restrict globalushort8restrictp,\n" - " global int8 *restrict globalint8restrictp,\n" - " global uint8* restrict globaluint8restrictp,\n" - " global long8 * restrict globallong8restrictp,\n" - " global ulong8*restrict globalulong8restrictp,\n" - " global float8 *restrict globalfloat8restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_vector8_p(global const char8* globalconstchar8p,\n" - " global const uchar8 * globalconstuchar8p,\n" - " global const short8*globalconstshort8p,\n" - " global const ushort8 *globalconstushort8p,\n" - " global const int8* globalconstint8p,\n" - " global const uint8 * globalconstuint8p,\n" - " global const long8*globalconstlong8p,\n" - " global const ulong8 *globalconstulong8p,\n" - " global const float8* globalconstfloat8p)\n" - "{}\n", - "\n" - "kernel void global_const_vector8_restrict_p(global const char8 * restrict globalconstchar8restrictp,\n" - " global const uchar8*restrict globalconstuchar8restrictp,\n" - " global const short8 *restrict globalconstshort8restrictp,\n" - " global const ushort8* restrict globalconstushort8restrictp,\n" - " global const int8 * restrict globalconstint8restrictp,\n" - " global const uint8*restrict globalconstuint8restrictp,\n" - " global const long8 *restrict globalconstlong8restrictp,\n" - " global const ulong8* restrict globalconstulong8restrictp,\n" - " global const float8 * restrict globalconstfloat8restrictp)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector8_p(global volatile char8*globalvolatilechar8p,\n" - " global volatile uchar8 *globalvolatileuchar8p,\n" - " global volatile short8* globalvolatileshort8p,\n" - " global volatile ushort8 * globalvolatileushort8p,\n" - " global volatile int8*globalvolatileint8p,\n" - " global volatile uint8 *globalvolatileuint8p,\n" - " global volatile long8* globalvolatilelong8p,\n" - " global volatile ulong8 * globalvolatileulong8p,\n" - " global volatile float8*globalvolatilefloat8p)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector8_restrict_p(global volatile char8 *restrict globalvolatilechar8restrictp,\n" - " global volatile uchar8* restrict globalvolatileuchar8restrictp,\n" - " global volatile short8 * restrict globalvolatileshort8restrictp,\n" - " global volatile ushort8*restrict globalvolatileushort8restrictp,\n" - " global volatile int8 *restrict globalvolatileint8restrictp,\n" - " global volatile uint8* restrict globalvolatileuint8restrictp,\n" - " global volatile long8 * restrict globalvolatilelong8restrictp,\n" - " global volatile ulong8*restrict globalvolatileulong8restrictp,\n" - " global volatile float8 *restrict globalvolatilefloat8restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector8_p(global const volatile char8* globalconstvolatilechar8p,\n" - " global const volatile uchar8 * globalconstvolatileuchar8p,\n" - " global const volatile short8*globalconstvolatileshort8p,\n" - " global const volatile ushort8 *globalconstvolatileushort8p,\n" - " global const volatile int8* globalconstvolatileint8p,\n" - " global const volatile uint8 * globalconstvolatileuint8p,\n" - " global const volatile long8*globalconstvolatilelong8p,\n" - " global const volatile ulong8 *globalconstvolatileulong8p,\n" - " global const volatile float8* globalconstvolatilefloat8p)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector8_restrict_p(global const volatile char8 * restrict globalconstvolatilechar8restrictp,\n" - " global const volatile uchar8*restrict globalconstvolatileuchar8restrictp,\n" - " global const volatile short8 *restrict globalconstvolatileshort8restrictp,\n" - " global const volatile ushort8* restrict globalconstvolatileushort8restrictp,\n" - " global const volatile int8 * restrict globalconstvolatileint8restrictp,\n" - " global const volatile uint8*restrict globalconstvolatileuint8restrictp,\n" - " global const volatile long8 *restrict globalconstvolatilelong8restrictp,\n" - " global const volatile ulong8* restrict globalconstvolatileulong8restrictp,\n" - " global const volatile float8 * restrict globalconstvolatilefloat8restrictp)\n" - "{}\n", - "\n" - "kernel void local_vector8_p(local char8*localchar8p,\n" - " local uchar8 *localuchar8p,\n" - " local short8* localshort8p,\n" - " local ushort8 * localushort8p,\n" - " local int8*localint8p,\n" - " local uint8 *localuint8p,\n" - " local long8* locallong8p,\n" - " local ulong8 * localulong8p,\n" - " local float8*localfloat8p)\n" - "{}\n", - "\n" - "kernel void local_vector8_restrict_p(local char8 *restrict localchar8restrictp,\n" - " local uchar8* restrict localuchar8restrictp,\n" - " local short8 * restrict localshort8restrictp,\n" - " local ushort8*restrict localushort8restrictp,\n" - " local int8 *restrict localint8restrictp,\n" - " local uint8* restrict localuint8restrictp,\n" - " local long8 * restrict locallong8restrictp,\n" - " local ulong8*restrict localulong8restrictp,\n" - " local float8 *restrict localfloat8restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_vector8_p(local const char8* localconstchar8p,\n" - " local const uchar8 * localconstuchar8p,\n" - " local const short8*localconstshort8p,\n" - " local const ushort8 *localconstushort8p,\n" - " local const int8* localconstint8p,\n" - " local const uint8 * localconstuint8p,\n" - " local const long8*localconstlong8p,\n" - " local const ulong8 *localconstulong8p,\n" - " local const float8* localconstfloat8p)\n" - "{}\n", - "\n" - "kernel void local_const_vector8_restrict_p(local const char8 * restrict localconstchar8restrictp,\n" - " local const uchar8*restrict localconstuchar8restrictp,\n" - " local const short8 *restrict localconstshort8restrictp,\n" - " local const ushort8* restrict localconstushort8restrictp,\n" - " local const int8 * restrict localconstint8restrictp,\n" - " local const uint8*restrict localconstuint8restrictp,\n" - " local const long8 *restrict localconstlong8restrictp,\n" - " local const ulong8* restrict localconstulong8restrictp,\n" - " local const float8 * restrict localconstfloat8restrictp)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector8_p(local volatile char8*localvolatilechar8p,\n" - " local volatile uchar8 *localvolatileuchar8p,\n" - " local volatile short8* localvolatileshort8p,\n" - " local volatile ushort8 * localvolatileushort8p,\n" - " local volatile int8*localvolatileint8p,\n" - " local volatile uint8 *localvolatileuint8p,\n" - " local volatile long8* localvolatilelong8p,\n" - " local volatile ulong8 * localvolatileulong8p,\n" - " local volatile float8*localvolatilefloat8p)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector8_restrict_p(local volatile char8 *restrict localvolatilechar8restrictp,\n" - " local volatile uchar8* restrict localvolatileuchar8restrictp,\n" - " local volatile short8 * restrict localvolatileshort8restrictp,\n" - " local volatile ushort8*restrict localvolatileushort8restrictp,\n" - " local volatile int8 *restrict localvolatileint8restrictp,\n" - " local volatile uint8* restrict localvolatileuint8restrictp,\n" - " local volatile long8 * restrict localvolatilelong8restrictp,\n" - " local volatile ulong8*restrict localvolatileulong8restrictp,\n" - " local volatile float8 *restrict localvolatilefloat8restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector8_p(local const volatile char8* localconstvolatilechar8p,\n" - " local const volatile uchar8 * localconstvolatileuchar8p,\n" - " local const volatile short8*localconstvolatileshort8p,\n" - " local const volatile ushort8 *localconstvolatileushort8p,\n" - " local const volatile int8* localconstvolatileint8p,\n" - " local const volatile uint8 * localconstvolatileuint8p,\n" - " local const volatile long8*localconstvolatilelong8p,\n" - " local const volatile ulong8 *localconstvolatileulong8p,\n" - " local const volatile float8* localconstvolatilefloat8p)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector8_restrict_p(local const volatile char8 * restrict localconstvolatilechar8restrictp,\n" - " local const volatile uchar8*restrict localconstvolatileuchar8restrictp,\n" - " local const volatile short8 *restrict localconstvolatileshort8restrictp,\n" - " local const volatile ushort8* restrict localconstvolatileushort8restrictp,\n" - " local const volatile int8 * restrict localconstvolatileint8restrictp,\n" - " local const volatile uint8*restrict localconstvolatileuint8restrictp,\n" - " local const volatile long8 *restrict localconstvolatilelong8restrictp,\n" - " local const volatile ulong8* restrict localconstvolatileulong8restrictp,\n" - " local const volatile float8 * restrict localconstvolatilefloat8restrictp)\n" - "{}\n", - "\n" - "kernel void vector8_d(char8 char8d,\n" - " uchar8 uchar8d,\n" - " short8 short8d,\n" - " ushort8 ushort8d,\n" - " int8 int8d,\n" - " uint8 uint8d,\n" - " long8 long8d,\n" - " ulong8 ulong8d,\n" - " float8 float8d)\n" - "{}\n", - "\n" - "kernel void const_vector8_d(const char8 constchar8d,\n" - " const uchar8 constuchar8d,\n" - " const short8 constshort8d,\n" - " const ushort8 constushort8d,\n" - " const int8 constint8d,\n" - " const uint8 constuint8d,\n" - " const long8 constlong8d,\n" - " const ulong8 constulong8d,\n" - " const float8 constfloat8d)\n" - "{}\n", - "\n" - "kernel void private_vector8_d(private char8 privatechar8d,\n" - " private uchar8 privateuchar8d,\n" - " private short8 privateshort8d,\n" - " private ushort8 privateushort8d,\n" - " private int8 privateint8d,\n" - " private uint8 privateuint8d,\n" - " private long8 privatelong8d,\n" - " private ulong8 privateulong8d,\n" - " private float8 privatefloat8d)\n" - "{}\n", - "\n" - "kernel void private_const_vector8_d(private const char8 privateconstchar8d,\n" - " private const uchar8 privateconstuchar8d,\n" - " private const short8 privateconstshort8d,\n" - " private const ushort8 privateconstushort8d,\n" - " private const int8 privateconstint8d,\n" - " private const uint8 privateconstuint8d,\n" - " private const long8 privateconstlong8d,\n" - " private const ulong8 privateconstulong8d,\n" - " private const float8 privateconstfloat8d)\n" - "{}\n", - "\n" - "kernel void constant_vector16_p0(constant char16*constantchar16p,\n" - " constant uchar16 *constantuchar16p,\n" - " constant short16* constantshort16p,\n" - " constant ushort16 * constantushort16p)\n" - "{}\n", - "\n" - "kernel void constant_vector16_p1(constant int16*constantint16p,\n" - " constant uint16 *constantuint16p,\n" - " constant long16* constantlong16p,\n" - " constant ulong16 * constantulong16p)\n" - "{}\n", - "\n" - "kernel void constant_vector16_p2(constant float16*constantfloat16p)\n" - "{}\n", - "\n" - "kernel void constant_vector16_restrict_p0(constant char16 *restrict constantchar16restrictp,\n" - " constant uchar16* restrict constantuchar16restrictp,\n" - " constant short16 * restrict constantshort16restrictp,\n" - " constant ushort16*restrict constantushort16restrictp)\n" - "{}\n", - "\n" - "kernel void constant_vector16_restrict_p1(constant int16 *restrict constantint16restrictp,\n" - " constant uint16* restrict constantuint16restrictp,\n" - " constant long16 * restrict constantlong16restrictp,\n" - " constant ulong16*restrict constantulong16restrictp)\n" - "{}\n", - "\n" - "kernel void constant_vector16_restrict_p2(constant float16 *restrict constantfloat16restrictp)\n" - "{}\n", - "\n" - "kernel void global_vector16_p(global char16*globalchar16p,\n" - " global uchar16 *globaluchar16p,\n" - " global short16* globalshort16p,\n" - " global ushort16 * globalushort16p,\n" - " global int16*globalint16p,\n" - " global uint16 *globaluint16p,\n" - " global long16* globallong16p,\n" - " global ulong16 * globalulong16p,\n" - " global float16*globalfloat16p)\n" - "{}\n", - "\n" - "kernel void global_vector16_restrict_p(global char16 *restrict globalchar16restrictp,\n" - " global uchar16* restrict globaluchar16restrictp,\n" - " global short16 * restrict globalshort16restrictp,\n" - " global ushort16*restrict globalushort16restrictp,\n" - " global int16 *restrict globalint16restrictp,\n" - " global uint16* restrict globaluint16restrictp,\n" - " global long16 * restrict globallong16restrictp,\n" - " global ulong16*restrict globalulong16restrictp,\n" - " global float16 *restrict globalfloat16restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_vector16_p(global const char16* globalconstchar16p,\n" - " global const uchar16 * globalconstuchar16p,\n" - " global const short16*globalconstshort16p,\n" - " global const ushort16 *globalconstushort16p,\n" - " global const int16* globalconstint16p,\n" - " global const uint16 * globalconstuint16p,\n" - " global const long16*globalconstlong16p,\n" - " global const ulong16 *globalconstulong16p,\n" - " global const float16* globalconstfloat16p)\n" - "{}\n", - "\n" - "kernel void global_const_vector16_restrict_p(global const char16 * restrict globalconstchar16restrictp,\n" - " global const uchar16*restrict globalconstuchar16restrictp,\n" - " global const short16 *restrict globalconstshort16restrictp,\n" - " global const ushort16* restrict globalconstushort16restrictp,\n" - " global const int16 * restrict globalconstint16restrictp,\n" - " global const uint16*restrict globalconstuint16restrictp,\n" - " global const long16 *restrict globalconstlong16restrictp,\n" - " global const ulong16* restrict globalconstulong16restrictp,\n" - " global const float16 * restrict globalconstfloat16restrictp)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector16_p(global volatile char16*globalvolatilechar16p,\n" - " global volatile uchar16 *globalvolatileuchar16p,\n" - " global volatile short16* globalvolatileshort16p,\n" - " global volatile ushort16 * globalvolatileushort16p,\n" - " global volatile int16*globalvolatileint16p,\n" - " global volatile uint16 *globalvolatileuint16p,\n" - " global volatile long16* globalvolatilelong16p,\n" - " global volatile ulong16 * globalvolatileulong16p,\n" - " global volatile float16*globalvolatilefloat16p)\n" - "{}\n", - "\n" - "kernel void global_volatile_vector16_restrict_p(global volatile char16 *restrict globalvolatilechar16restrictp,\n" - " global volatile uchar16* restrict globalvolatileuchar16restrictp,\n" - " global volatile short16 * restrict globalvolatileshort16restrictp,\n" - " global volatile ushort16*restrict globalvolatileushort16restrictp,\n" - " global volatile int16 *restrict globalvolatileint16restrictp,\n" - " global volatile uint16* restrict globalvolatileuint16restrictp,\n" - " global volatile long16 * restrict globalvolatilelong16restrictp,\n" - " global volatile ulong16*restrict globalvolatileulong16restrictp,\n" - " global volatile float16 *restrict globalvolatilefloat16restrictp)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector16_p(global const volatile char16* globalconstvolatilechar16p,\n" - " global const volatile uchar16 * globalconstvolatileuchar16p,\n" - " global const volatile short16*globalconstvolatileshort16p,\n" - " global const volatile ushort16 *globalconstvolatileushort16p,\n" - " global const volatile int16* globalconstvolatileint16p,\n" - " global const volatile uint16 * globalconstvolatileuint16p,\n" - " global const volatile long16*globalconstvolatilelong16p,\n" - " global const volatile ulong16 *globalconstvolatileulong16p,\n" - " global const volatile float16* globalconstvolatilefloat16p)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_vector16_restrict_p(global const volatile char16 * restrict globalconstvolatilechar16restrictp,\n" - " global const volatile uchar16*restrict globalconstvolatileuchar16restrictp,\n" - " global const volatile short16 *restrict globalconstvolatileshort16restrictp,\n" - " global const volatile ushort16* restrict globalconstvolatileushort16restrictp,\n" - " global const volatile int16 * restrict globalconstvolatileint16restrictp,\n" - " global const volatile uint16*restrict globalconstvolatileuint16restrictp,\n" - " global const volatile long16 *restrict globalconstvolatilelong16restrictp,\n" - " global const volatile ulong16* restrict globalconstvolatileulong16restrictp,\n" - " global const volatile float16 * restrict globalconstvolatilefloat16restrictp)\n" - "{}\n", - "\n" - "kernel void local_vector16_p(local char16*localchar16p,\n" - " local uchar16 *localuchar16p,\n" - " local short16* localshort16p,\n" - " local ushort16 * localushort16p,\n" - " local int16*localint16p,\n" - " local uint16 *localuint16p,\n" - " local long16* locallong16p,\n" - " local ulong16 * localulong16p,\n" - " local float16*localfloat16p)\n" - "{}\n", - "\n" - "kernel void local_vector16_restrict_p(local char16 *restrict localchar16restrictp,\n" - " local uchar16* restrict localuchar16restrictp,\n" - " local short16 * restrict localshort16restrictp,\n" - " local ushort16*restrict localushort16restrictp,\n" - " local int16 *restrict localint16restrictp,\n" - " local uint16* restrict localuint16restrictp,\n" - " local long16 * restrict locallong16restrictp,\n" - " local ulong16*restrict localulong16restrictp,\n" - " local float16 *restrict localfloat16restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_vector16_p(local const char16* localconstchar16p,\n" - " local const uchar16 * localconstuchar16p,\n" - " local const short16*localconstshort16p,\n" - " local const ushort16 *localconstushort16p,\n" - " local const int16* localconstint16p,\n" - " local const uint16 * localconstuint16p,\n" - " local const long16*localconstlong16p,\n" - " local const ulong16 *localconstulong16p,\n" - " local const float16* localconstfloat16p)\n" - "{}\n", - "\n" - "kernel void local_const_vector16_restrict_p(local const char16 * restrict localconstchar16restrictp,\n" - " local const uchar16*restrict localconstuchar16restrictp,\n" - " local const short16 *restrict localconstshort16restrictp,\n" - " local const ushort16* restrict localconstushort16restrictp,\n" - " local const int16 * restrict localconstint16restrictp,\n" - " local const uint16*restrict localconstuint16restrictp,\n" - " local const long16 *restrict localconstlong16restrictp,\n" - " local const ulong16* restrict localconstulong16restrictp,\n" - " local const float16 * restrict localconstfloat16restrictp)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector16_p(local volatile char16*localvolatilechar16p,\n" - " local volatile uchar16 *localvolatileuchar16p,\n" - " local volatile short16* localvolatileshort16p,\n" - " local volatile ushort16 * localvolatileushort16p,\n" - " local volatile int16*localvolatileint16p,\n" - " local volatile uint16 *localvolatileuint16p,\n" - " local volatile long16* localvolatilelong16p,\n" - " local volatile ulong16 * localvolatileulong16p,\n" - " local volatile float16*localvolatilefloat16p)\n" - "{}\n", - "\n" - "kernel void local_volatile_vector16_restrict_p(local volatile char16 *restrict localvolatilechar16restrictp,\n" - " local volatile uchar16* restrict localvolatileuchar16restrictp,\n" - " local volatile short16 * restrict localvolatileshort16restrictp,\n" - " local volatile ushort16*restrict localvolatileushort16restrictp,\n" - " local volatile int16 *restrict localvolatileint16restrictp,\n" - " local volatile uint16* restrict localvolatileuint16restrictp,\n" - " local volatile long16 * restrict localvolatilelong16restrictp,\n" - " local volatile ulong16*restrict localvolatileulong16restrictp,\n" - " local volatile float16 *restrict localvolatilefloat16restrictp)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector16_p(local const volatile char16* localconstvolatilechar16p,\n" - " local const volatile uchar16 * localconstvolatileuchar16p,\n" - " local const volatile short16*localconstvolatileshort16p,\n" - " local const volatile ushort16 *localconstvolatileushort16p,\n" - " local const volatile int16* localconstvolatileint16p,\n" - " local const volatile uint16 * localconstvolatileuint16p,\n" - " local const volatile long16*localconstvolatilelong16p,\n" - " local const volatile ulong16 *localconstvolatileulong16p,\n" - " local const volatile float16* localconstvolatilefloat16p)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_vector16_restrict_p(local const volatile char16 * restrict localconstvolatilechar16restrictp,\n" - " local const volatile uchar16*restrict localconstvolatileuchar16restrictp,\n" - " local const volatile short16 *restrict localconstvolatileshort16restrictp,\n" - " local const volatile ushort16* restrict localconstvolatileushort16restrictp,\n" - " local const volatile int16 * restrict localconstvolatileint16restrictp,\n" - " local const volatile uint16*restrict localconstvolatileuint16restrictp,\n" - " local const volatile long16 *restrict localconstvolatilelong16restrictp,\n" - " local const volatile ulong16* restrict localconstvolatileulong16restrictp,\n" - " local const volatile float16 * restrict localconstvolatilefloat16restrictp)\n" - "{}\n", - "\n" - "kernel void vector16_d(char16 char16d,\n" - " uchar16 uchar16d,\n" - " short16 short16d,\n" - " ushort16 ushort16d,\n" - " int16 int16d,\n" - " uint16 uint16d,\n" - " long16 long16d,\n" - " ulong16 ulong16d,\n" - " float16 float16d)\n" - "{}\n", - "\n" - "kernel void const_vector16_d(const char16 constchar16d,\n" - " const uchar16 constuchar16d,\n" - " const short16 constshort16d,\n" - " const ushort16 constushort16d,\n" - " const int16 constint16d,\n" - " const uint16 constuint16d,\n" - " const long16 constlong16d,\n" - " const ulong16 constulong16d,\n" - " const float16 constfloat16d)\n" - "{}\n", - "\n" - "kernel void private_vector16_d(private char16 privatechar16d,\n" - " private uchar16 privateuchar16d,\n" - " private short16 privateshort16d,\n" - " private ushort16 privateushort16d,\n" - " private int16 privateint16d,\n" - " private uint16 privateuint16d,\n" - " private long16 privatelong16d,\n" - " private ulong16 privateulong16d,\n" - " private float16 privatefloat16d)\n" - "{}\n", - "\n" - "kernel void private_const_vector16_d(private const char16 privateconstchar16d,\n" - " private const uchar16 privateconstuchar16d,\n" - " private const short16 privateconstshort16d,\n" - " private const ushort16 privateconstushort16d,\n" - " private const int16 privateconstint16d,\n" - " private const uint16 privateconstuint16d,\n" - " private const long16 privateconstlong16d,\n" - " private const ulong16 privateconstulong16d,\n" - " private const float16 privateconstfloat16d)\n" - "{}\n", - "\n" - "kernel void constant_derived_p0(constant typedef_type*constanttypedef_typep,\n" - " constant struct struct_type *constantstructstruct_typep,\n" - " constant typedef_struct_type* constanttypedef_struct_typep,\n" - " constant union union_type * constantunionunion_typep)\n" - "{}\n", - "\n" - "kernel void constant_derived_p1(constant typedef_union_type*constanttypedef_union_typep,\n" - " constant enum enum_type *constantenumenum_typep,\n" - " constant typedef_enum_type* constanttypedef_enum_typep)\n" - "{}\n", - "\n" - "kernel void constant_derived_restrict_p0(constant typedef_type * restrict constanttypedef_typerestrictp,\n" - " constant struct struct_type*restrict constantstructstruct_typerestrictp,\n" - " constant typedef_struct_type *restrict constanttypedef_struct_typerestrictp,\n" - " constant union union_type* restrict constantunionunion_typerestrictp)\n" - "{}\n", - "\n" - "kernel void constant_derived_restrict_p1(constant typedef_union_type * restrict constanttypedef_union_typerestrictp,\n" - " constant enum enum_type*restrict constantenumenum_typerestrictp,\n" - " constant typedef_enum_type *restrict constanttypedef_enum_typerestrictp)\n" - "{}\n", - "\n" - "kernel void global_derived_p(global typedef_type*globaltypedef_typep,\n" - " global struct struct_type *globalstructstruct_typep,\n" - " global typedef_struct_type* globaltypedef_struct_typep,\n" - " global union union_type * globalunionunion_typep,\n" - " global typedef_union_type*globaltypedef_union_typep,\n" - " global enum enum_type *globalenumenum_typep,\n" - " global typedef_enum_type* globaltypedef_enum_typep)\n" - "{}\n", - "\n" - "kernel void global_derived_restrict_p(global typedef_type * restrict globaltypedef_typerestrictp,\n" - " global struct struct_type*restrict globalstructstruct_typerestrictp,\n" - " global typedef_struct_type *restrict globaltypedef_struct_typerestrictp,\n" - " global union union_type* restrict globalunionunion_typerestrictp,\n" - " global typedef_union_type * restrict globaltypedef_union_typerestrictp,\n" - " global enum enum_type*restrict globalenumenum_typerestrictp,\n" - " global typedef_enum_type *restrict globaltypedef_enum_typerestrictp)\n" - "{}\n", - "\n" - "kernel void global_const_derived_p(global const typedef_type* globalconsttypedef_typep,\n" - " global const struct struct_type * globalconststructstruct_typep,\n" - " global const typedef_struct_type*globalconsttypedef_struct_typep,\n" - " global const union union_type *globalconstunionunion_typep,\n" - " global const typedef_union_type* globalconsttypedef_union_typep,\n" - " global const enum enum_type * globalconstenumenum_typep,\n" - " global const typedef_enum_type*globalconsttypedef_enum_typep)\n" - "{}\n", - "\n" - "kernel void global_const_derived_restrict_p(global const typedef_type *restrict globalconsttypedef_typerestrictp,\n" - " global const struct struct_type* restrict globalconststructstruct_typerestrictp,\n" - " global const typedef_struct_type * restrict globalconsttypedef_struct_typerestrictp,\n" - " global const union union_type*restrict globalconstunionunion_typerestrictp,\n" - " global const typedef_union_type *restrict globalconsttypedef_union_typerestrictp,\n" - " global const enum enum_type* restrict globalconstenumenum_typerestrictp,\n" - " global const typedef_enum_type * restrict globalconsttypedef_enum_typerestrictp)\n" - "{}\n", - "\n" - "kernel void global_volatile_derived_p(global volatile typedef_type*globalvolatiletypedef_typep,\n" - " global volatile struct struct_type *globalvolatilestructstruct_typep,\n" - " global volatile typedef_struct_type* globalvolatiletypedef_struct_typep,\n" - " global volatile union union_type * globalvolatileunionunion_typep,\n" - " global volatile typedef_union_type*globalvolatiletypedef_union_typep,\n" - " global volatile enum enum_type *globalvolatileenumenum_typep,\n" - " global volatile typedef_enum_type* globalvolatiletypedef_enum_typep)\n" - "{}\n", - "\n" - "kernel void global_volatile_derived_restrict_p(global volatile typedef_type * restrict globalvolatiletypedef_typerestrictp,\n" - " global volatile struct struct_type*restrict globalvolatilestructstruct_typerestrictp,\n" - " global volatile typedef_struct_type *restrict globalvolatiletypedef_struct_typerestrictp,\n" - " global volatile union union_type* restrict globalvolatileunionunion_typerestrictp,\n" - " global volatile typedef_union_type * restrict globalvolatiletypedef_union_typerestrictp,\n" - " global volatile enum enum_type*restrict globalvolatileenumenum_typerestrictp,\n" - " global volatile typedef_enum_type *restrict globalvolatiletypedef_enum_typerestrictp)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_derived_p(global const volatile typedef_type* globalconstvolatiletypedef_typep,\n" - " global const volatile struct struct_type * globalconstvolatilestructstruct_typep,\n" - " global const volatile typedef_struct_type*globalconstvolatiletypedef_struct_typep,\n" - " global const volatile union union_type *globalconstvolatileunionunion_typep,\n" - " global const volatile typedef_union_type* globalconstvolatiletypedef_union_typep,\n" - " global const volatile enum enum_type * globalconstvolatileenumenum_typep,\n" - " global const volatile typedef_enum_type*globalconstvolatiletypedef_enum_typep)\n" - "{}\n", - "\n" - "kernel void global_const_volatile_derived_restrict_p(global const volatile typedef_type *restrict globalconstvolatiletypedef_typerestrictp,\n" - " global const volatile struct struct_type* restrict globalconstvolatilestructstruct_typerestrictp,\n" - " global const volatile typedef_struct_type * restrict globalconstvolatiletypedef_struct_typerestrictp,\n" - " global const volatile union union_type*restrict globalconstvolatileunionunion_typerestrictp,\n" - " global const volatile typedef_union_type *restrict globalconstvolatiletypedef_union_typerestrictp,\n" - " global const volatile enum enum_type* restrict globalconstvolatileenumenum_typerestrictp,\n" - " global const volatile typedef_enum_type * restrict globalconstvolatiletypedef_enum_typerestrictp)\n" - "{}\n", - "\n" - "kernel void local_derived_p(local typedef_type*localtypedef_typep,\n" - " local struct struct_type *localstructstruct_typep,\n" - " local typedef_struct_type* localtypedef_struct_typep,\n" - " local union union_type * localunionunion_typep,\n" - " local typedef_union_type*localtypedef_union_typep,\n" - " local enum enum_type *localenumenum_typep,\n" - " local typedef_enum_type* localtypedef_enum_typep)\n" - "{}\n", - "\n" - "kernel void local_derived_restrict_p(local typedef_type * restrict localtypedef_typerestrictp,\n" - " local struct struct_type*restrict localstructstruct_typerestrictp,\n" - " local typedef_struct_type *restrict localtypedef_struct_typerestrictp,\n" - " local union union_type* restrict localunionunion_typerestrictp,\n" - " local typedef_union_type * restrict localtypedef_union_typerestrictp,\n" - " local enum enum_type*restrict localenumenum_typerestrictp,\n" - " local typedef_enum_type *restrict localtypedef_enum_typerestrictp)\n" - "{}\n", - "\n" - "kernel void local_const_derived_p(local const typedef_type* localconsttypedef_typep,\n" - " local const struct struct_type * localconststructstruct_typep,\n" - " local const typedef_struct_type*localconsttypedef_struct_typep,\n" - " local const union union_type *localconstunionunion_typep,\n" - " local const typedef_union_type* localconsttypedef_union_typep,\n" - " local const enum enum_type * localconstenumenum_typep,\n" - " local const typedef_enum_type*localconsttypedef_enum_typep)\n" - "{}\n", - "\n" - "kernel void local_const_derived_restrict_p(local const typedef_type *restrict localconsttypedef_typerestrictp,\n" - " local const struct struct_type* restrict localconststructstruct_typerestrictp,\n" - " local const typedef_struct_type * restrict localconsttypedef_struct_typerestrictp,\n" - " local const union union_type*restrict localconstunionunion_typerestrictp,\n" - " local const typedef_union_type *restrict localconsttypedef_union_typerestrictp,\n" - " local const enum enum_type* restrict localconstenumenum_typerestrictp,\n" - " local const typedef_enum_type * restrict localconsttypedef_enum_typerestrictp)\n" - "{}\n", - "\n" - "kernel void local_volatile_derived_p(local volatile typedef_type*localvolatiletypedef_typep,\n" - " local volatile struct struct_type *localvolatilestructstruct_typep,\n" - " local volatile typedef_struct_type* localvolatiletypedef_struct_typep,\n" - " local volatile union union_type * localvolatileunionunion_typep,\n" - " local volatile typedef_union_type*localvolatiletypedef_union_typep,\n" - " local volatile enum enum_type *localvolatileenumenum_typep,\n" - " local volatile typedef_enum_type* localvolatiletypedef_enum_typep)\n" - "{}\n", - "\n" - "kernel void local_volatile_derived_restrict_p(local volatile typedef_type * restrict localvolatiletypedef_typerestrictp,\n" - " local volatile struct struct_type*restrict localvolatilestructstruct_typerestrictp,\n" - " local volatile typedef_struct_type *restrict localvolatiletypedef_struct_typerestrictp,\n" - " local volatile union union_type* restrict localvolatileunionunion_typerestrictp,\n" - " local volatile typedef_union_type * restrict localvolatiletypedef_union_typerestrictp,\n" - " local volatile enum enum_type*restrict localvolatileenumenum_typerestrictp,\n" - " local volatile typedef_enum_type *restrict localvolatiletypedef_enum_typerestrictp)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_derived_p(local const volatile typedef_type* localconstvolatiletypedef_typep,\n" - " local const volatile struct struct_type * localconstvolatilestructstruct_typep,\n" - " local const volatile typedef_struct_type*localconstvolatiletypedef_struct_typep,\n" - " local const volatile union union_type *localconstvolatileunionunion_typep,\n" - " local const volatile typedef_union_type* localconstvolatiletypedef_union_typep,\n" - " local const volatile enum enum_type * localconstvolatileenumenum_typep,\n" - " local const volatile typedef_enum_type*localconstvolatiletypedef_enum_typep)\n" - "{}\n", - "\n" - "kernel void local_const_volatile_derived_restrict_p(local const volatile typedef_type *restrict localconstvolatiletypedef_typerestrictp,\n" - " local const volatile struct struct_type* restrict localconstvolatilestructstruct_typerestrictp,\n" - " local const volatile typedef_struct_type * restrict localconstvolatiletypedef_struct_typerestrictp,\n" - " local const volatile union union_type*restrict localconstvolatileunionunion_typerestrictp,\n" - " local const volatile typedef_union_type *restrict localconstvolatiletypedef_union_typerestrictp,\n" - " local const volatile enum enum_type* restrict localconstvolatileenumenum_typerestrictp,\n" - " local const volatile typedef_enum_type * restrict localconstvolatiletypedef_enum_typerestrictp)\n" - "{}\n", - "\n" - "kernel void derived_d(typedef_type typedef_typed,\n" - " struct struct_type structstruct_typed,\n" - " typedef_struct_type typedef_struct_typed,\n" - " union union_type unionunion_typed,\n" - " typedef_union_type typedef_union_typed,\n" - " enum enum_type enumenum_typed,\n" - " typedef_enum_type typedef_enum_typed)\n" - "{}\n", - "\n" - "kernel void const_derived_d(const typedef_type consttypedef_typed,\n" - " const struct struct_type conststructstruct_typed,\n" - " const typedef_struct_type consttypedef_struct_typed,\n" - " const union union_type constunionunion_typed,\n" - " const typedef_union_type consttypedef_union_typed,\n" - " const enum enum_type constenumenum_typed,\n" - " const typedef_enum_type consttypedef_enum_typed)\n" - "{}\n", - "\n" - "kernel void private_derived_d(private typedef_type privatetypedef_typed,\n" - " private struct struct_type privatestructstruct_typed,\n" - " private typedef_struct_type privatetypedef_struct_typed,\n" - " private union union_type privateunionunion_typed,\n" - " private typedef_union_type privatetypedef_union_typed,\n" - " private enum enum_type privateenumenum_typed,\n" - " private typedef_enum_type privatetypedef_enum_typed)\n" - "{}\n", - "\n" - "kernel void private_const_derived_d(private const typedef_type privateconsttypedef_typed,\n" - " private const struct struct_type privateconststructstruct_typed,\n" - " private const typedef_struct_type privateconsttypedef_struct_typed,\n" - " private const union union_type privateconstunionunion_typed,\n" - " private const typedef_union_type privateconsttypedef_union_typed,\n" - " private const enum enum_type privateconstenumenum_typed,\n" - " private const typedef_enum_type privateconsttypedef_enum_typed)\n" - "{}\n", - "\n" -}; - -static const char * required_arg_info[][72] = { - // The minimum value of CL_DEVICE_MAX_CONSTANT_ARGS is 4 - { - "constant_scalar_p0", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "void*", "constantvoidp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char*", "constantcharp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "constantucharp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "constantunsignedcharp", - NULL - }, - { - "constant_scalar_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short*", "constantshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "constantushortp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "constantunsignedshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int*", "constantintp", - NULL - }, - { - "constant_scalar_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "constantuintp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "constantunsignedintp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long*", "constantlongp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "constantulongp", - NULL - }, - { - "constant_scalar_p3", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "constantunsignedlongp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float*", "constantfloatp", - NULL - }, - { - "constant_scalar_restrict_p0", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "constantvoidrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "constantcharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "constantucharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "constantunsignedcharrestrictp", - NULL - }, - { - "constant_scalar_restrict_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "constantshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "constantushortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "constantunsignedshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "constantintrestrictp", - NULL - }, - { - "constant_scalar_restrict_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "constantuintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "constantunsignedintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "constantlongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "constantulongrestrictp", - NULL - }, - { - "constant_scalar_restrict_p3", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "constantunsignedlongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "constantfloatrestrictp", - NULL - }, - { - "global_scalar_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "void*", "globalvoidp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char*", "globalcharp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "globalucharp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "globalunsignedcharp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short*", "globalshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "globalushortp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "globalunsignedshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int*", "globalintp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "globaluintp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "globalunsignedintp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long*", "globallongp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "globalulongp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "globalunsignedlongp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float*", "globalfloatp", - NULL - }, - { - "global_scalar_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalvoidrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalcharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalucharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalunsignedcharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalushortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalunsignedshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globaluintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalunsignedintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globallongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalulongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalunsignedlongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalfloatrestrictp", - NULL - }, - { - "global_const_scalar_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "void*", "globalconstvoidp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char*", "globalconstcharp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "globalconstucharp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "globalconstunsignedcharp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short*", "globalconstshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "globalconstushortp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "globalconstunsignedshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int*", "globalconstintp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "globalconstuintp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "globalconstunsignedintp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long*", "globalconstlongp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "globalconstulongp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "globalconstunsignedlongp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float*", "globalconstfloatp", - NULL - }, - { - "global_const_scalar_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalconstvoidrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalconstcharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstucharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstunsignedcharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalconstshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstushortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstunsignedshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalconstintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstuintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstunsignedintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globalconstlongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstulongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstunsignedlongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalconstfloatrestrictp", - NULL - }, - { - "global_volatile_scalar_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "globalvolatilevoidp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "globalvolatilecharp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalvolatileucharp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalvolatileunsignedcharp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "globalvolatileshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalvolatileushortp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalvolatileunsignedshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "globalvolatileintp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalvolatileuintp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalvolatileunsignedintp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "globalvolatilelongp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalvolatileulongp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalvolatileunsignedlongp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "globalvolatilefloatp", - NULL - }, - { - "global_volatile_scalar_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalvolatilevoidrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalvolatilecharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalvolatileucharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalvolatileunsignedcharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalvolatileshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalvolatileushortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalvolatileunsignedshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalvolatileintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalvolatileuintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalvolatileunsignedintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globalvolatilelongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalvolatileulongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalvolatileunsignedlongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalvolatilefloatrestrictp", - NULL - }, - { - "global_const_volatile_scalar_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "globalconstvolatilevoidp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "globalconstvolatilecharp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalconstvolatileucharp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalconstvolatileunsignedcharp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "globalconstvolatileshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalconstvolatileushortp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalconstvolatileunsignedshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "globalconstvolatileintp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalconstvolatileuintp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalconstvolatileunsignedintp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "globalconstvolatilelongp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalconstvolatileulongp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalconstvolatileunsignedlongp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "globalconstvolatilefloatp", - NULL - }, - { - "global_const_volatile_scalar_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalconstvolatilevoidrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalconstvolatilecharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstvolatileucharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstvolatileunsignedcharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalconstvolatileshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstvolatileushortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstvolatileunsignedshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalconstvolatileintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstvolatileuintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstvolatileunsignedintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globalconstvolatilelongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstvolatileulongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstvolatileunsignedlongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalconstvolatilefloatrestrictp", - NULL - }, - { - "local_scalar_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "void*", "localvoidp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char*", "localcharp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "localucharp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "localunsignedcharp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short*", "localshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "localushortp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "localunsignedshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int*", "localintp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "localuintp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "localunsignedintp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long*", "locallongp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "localulongp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "localunsignedlongp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float*", "localfloatp", - NULL - }, - { - "local_scalar_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localvoidrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localcharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localucharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localunsignedcharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localushortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localunsignedshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localuintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localunsignedintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "locallongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localulongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localunsignedlongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localfloatrestrictp", - NULL - }, - { - "local_const_scalar_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "void*", "localconstvoidp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char*", "localconstcharp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "localconstucharp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "localconstunsignedcharp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short*", "localconstshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "localconstushortp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "localconstunsignedshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int*", "localconstintp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "localconstuintp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "localconstunsignedintp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long*", "localconstlongp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "localconstulongp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "localconstunsignedlongp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float*", "localconstfloatp", - NULL - }, - { - "local_const_scalar_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localconstvoidrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localconstcharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstucharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstunsignedcharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localconstshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstushortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstunsignedshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localconstintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstuintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstunsignedintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "localconstlongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstulongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstunsignedlongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localconstfloatrestrictp", - NULL - }, - { - "local_volatile_scalar_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "localvolatilevoidp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "localvolatilecharp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localvolatileucharp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localvolatileunsignedcharp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "localvolatileshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localvolatileushortp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localvolatileunsignedshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "localvolatileintp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localvolatileuintp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localvolatileunsignedintp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "localvolatilelongp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localvolatileulongp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localvolatileunsignedlongp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "localvolatilefloatp", - NULL - }, - { - "local_volatile_scalar_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localvolatilevoidrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localvolatilecharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localvolatileucharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localvolatileunsignedcharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localvolatileshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localvolatileushortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localvolatileunsignedshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localvolatileintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localvolatileuintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localvolatileunsignedintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "localvolatilelongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localvolatileulongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localvolatileunsignedlongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localvolatilefloatrestrictp", - NULL - }, - { - "local_const_volatile_scalar_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "localconstvolatilevoidp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "localconstvolatilecharp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localconstvolatileucharp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localconstvolatileunsignedcharp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "localconstvolatileshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localconstvolatileushortp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localconstvolatileunsignedshortp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "localconstvolatileintp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localconstvolatileuintp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localconstvolatileunsignedintp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "localconstvolatilelongp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localconstvolatileulongp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localconstvolatileunsignedlongp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "localconstvolatilefloatp", - NULL - }, - { - "local_const_volatile_scalar_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localconstvolatilevoidrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localconstvolatilecharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstvolatileucharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstvolatileunsignedcharrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localconstvolatileshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstvolatileushortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstvolatileunsignedshortrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localconstvolatileintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstvolatileuintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstvolatileunsignedintrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "localconstvolatilelongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstvolatileulongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstvolatileunsignedlongrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localconstvolatilefloatrestrictp", - NULL - }, - { - "scalar_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "chard", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "uchard", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "unsignedchard", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "shortd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "ushortd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "unsignedshortd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "intd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "uintd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "unsignedintd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "longd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "ulongd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "unsignedlongd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "floatd", - NULL - }, - { - "const_scalar_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "constchard", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "constuchard", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "constunsignedchard", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "constshortd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "constushortd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "constunsignedshortd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "constintd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "constuintd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "constunsignedintd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "constlongd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "constulongd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "constunsignedlongd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "constfloatd", - NULL - }, - { - "private_scalar_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "privatechard", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateuchard", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateunsignedchard", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "privateshortd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateushortd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateunsignedshortd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "privateintd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateuintd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateunsignedintd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "privatelongd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateulongd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateunsignedlongd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "privatefloatd", - NULL - }, - { - "private_const_scalar_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "privateconstchard", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateconstuchard", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateconstunsignedchard", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "privateconstshortd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateconstushortd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateconstunsignedshortd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "privateconstintd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateconstuintd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateconstunsignedintd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "privateconstlongd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateconstulongd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateconstunsignedlongd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "privateconstfloatd", - NULL - }, - { - "constant_vector2_p0", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char2*", "constantchar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar2*", "constantuchar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short2*", "constantshort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort2*", "constantushort2p", - NULL - }, - { - "constant_vector2_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int2*", "constantint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint2*", "constantuint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long2*", "constantlong2p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong2*", "constantulong2p", - NULL - }, - { - "constant_vector2_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float2*", "constantfloat2p", - NULL - }, - { - "constant_vector2_restrict_p0", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "constantchar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "constantuchar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "constantshort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "constantushort2restrictp", - NULL - }, - { - "constant_vector2_restrict_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "constantint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "constantuint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "constantlong2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "constantulong2restrictp", - NULL - }, - { - "constant_vector2_restrict_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "constantfloat2restrictp", - NULL - }, - { - "global_vector2_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2*", "globalchar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2*", "globaluchar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2*", "globalshort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2*", "globalushort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2*", "globalint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2*", "globaluint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2*", "globallong2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2*", "globalulong2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2*", "globalfloat2p", - NULL - }, - { - "global_vector2_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalchar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globaluchar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalshort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalushort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globaluint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globallong2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalulong2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalfloat2restrictp", - NULL - }, - { - "global_const_vector2_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char2*", "globalconstchar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar2*", "globalconstuchar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short2*", "globalconstshort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort2*", "globalconstushort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int2*", "globalconstint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint2*", "globalconstuint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long2*", "globalconstlong2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong2*", "globalconstulong2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float2*", "globalconstfloat2p", - NULL - }, - { - "global_const_vector2_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalconstchar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globalconstuchar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalconstshort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalconstushort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalconstint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globalconstuint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globalconstlong2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalconstulong2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalconstfloat2restrictp", - NULL - }, - { - "global_volatile_vector2_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "globalvolatilechar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "globalvolatileuchar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "globalvolatileshort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "globalvolatileushort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "globalvolatileint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "globalvolatileuint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "globalvolatilelong2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "globalvolatileulong2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "globalvolatilefloat2p", - NULL - }, - { - "global_volatile_vector2_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalvolatilechar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globalvolatileuchar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalvolatileshort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalvolatileushort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalvolatileint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globalvolatileuint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globalvolatilelong2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalvolatileulong2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalvolatilefloat2restrictp", - NULL - }, - { - "global_const_volatile_vector2_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "globalconstvolatilechar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "globalconstvolatileuchar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "globalconstvolatileshort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "globalconstvolatileushort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "globalconstvolatileint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "globalconstvolatileuint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "globalconstvolatilelong2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "globalconstvolatileulong2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "globalconstvolatilefloat2p", - NULL - }, - { - "global_const_volatile_vector2_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalconstvolatilechar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globalconstvolatileuchar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalconstvolatileshort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalconstvolatileushort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalconstvolatileint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globalconstvolatileuint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globalconstvolatilelong2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalconstvolatileulong2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalconstvolatilefloat2restrictp", - NULL - }, - { - "local_vector2_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2*", "localchar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2*", "localuchar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2*", "localshort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2*", "localushort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2*", "localint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2*", "localuint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2*", "locallong2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2*", "localulong2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2*", "localfloat2p", - NULL - }, - { - "local_vector2_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localchar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localuchar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localshort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localushort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localuint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "locallong2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localulong2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localfloat2restrictp", - NULL - }, - { - "local_const_vector2_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char2*", "localconstchar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar2*", "localconstuchar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short2*", "localconstshort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort2*", "localconstushort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int2*", "localconstint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint2*", "localconstuint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long2*", "localconstlong2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong2*", "localconstulong2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float2*", "localconstfloat2p", - NULL - }, - { - "local_const_vector2_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localconstchar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localconstuchar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localconstshort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localconstushort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localconstint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localconstuint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "localconstlong2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localconstulong2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localconstfloat2restrictp", - NULL - }, - { - "local_volatile_vector2_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "localvolatilechar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "localvolatileuchar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "localvolatileshort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "localvolatileushort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "localvolatileint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "localvolatileuint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "localvolatilelong2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "localvolatileulong2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "localvolatilefloat2p", - NULL - }, - { - "local_volatile_vector2_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localvolatilechar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localvolatileuchar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localvolatileshort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localvolatileushort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localvolatileint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localvolatileuint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "localvolatilelong2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localvolatileulong2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localvolatilefloat2restrictp", - NULL - }, - { - "local_const_volatile_vector2_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "localconstvolatilechar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "localconstvolatileuchar2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "localconstvolatileshort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "localconstvolatileushort2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "localconstvolatileint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "localconstvolatileuint2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "localconstvolatilelong2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "localconstvolatileulong2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "localconstvolatilefloat2p", - NULL - }, - { - "local_const_volatile_vector2_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localconstvolatilechar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localconstvolatileuchar2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localconstvolatileshort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localconstvolatileushort2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localconstvolatileint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localconstvolatileuint2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "localconstvolatilelong2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localconstvolatileulong2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localconstvolatilefloat2restrictp", - NULL - }, - { - "vector2_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "char2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "uchar2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "short2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "ushort2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "int2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "uint2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "long2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "ulong2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "float2d", - NULL - }, - { - "const_vector2_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "constchar2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "constuchar2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "constshort2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "constushort2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "constint2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "constuint2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "constlong2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "constulong2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "constfloat2d", - NULL - }, - { - "private_vector2_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "privatechar2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "privateuchar2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "privateshort2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "privateushort2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "privateint2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "privateuint2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "privatelong2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "privateulong2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "privatefloat2d", - NULL - }, - { - "private_const_vector2_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "privateconstchar2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "privateconstuchar2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "privateconstshort2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "privateconstushort2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "privateconstint2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "privateconstuint2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "privateconstlong2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "privateconstulong2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "privateconstfloat2d", - NULL - }, - { - "constant_vector3_p0", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char3*", "constantchar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar3*", "constantuchar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short3*", "constantshort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort3*", "constantushort3p", - NULL - }, - { - "constant_vector3_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int3*", "constantint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint3*", "constantuint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long3*", "constantlong3p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong3*", "constantulong3p", - NULL - }, - { - "constant_vector3_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float3*", "constantfloat3p", - NULL - }, - { - "constant_vector3_restrict_p0", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "constantchar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "constantuchar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "constantshort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "constantushort3restrictp", - NULL - }, - { - "constant_vector3_restrict_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "constantint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "constantuint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "constantlong3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "constantulong3restrictp", - NULL - }, - { - "constant_vector3_restrict_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "constantfloat3restrictp", - NULL - }, - { - "global_vector3_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3*", "globalchar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3*", "globaluchar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3*", "globalshort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3*", "globalushort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3*", "globalint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3*", "globaluint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3*", "globallong3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3*", "globalulong3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3*", "globalfloat3p", - NULL - }, - { - "global_vector3_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalchar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globaluchar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalshort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalushort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globaluint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globallong3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalulong3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalfloat3restrictp", - NULL - }, - { - "global_const_vector3_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char3*", "globalconstchar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar3*", "globalconstuchar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short3*", "globalconstshort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort3*", "globalconstushort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int3*", "globalconstint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint3*", "globalconstuint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long3*", "globalconstlong3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong3*", "globalconstulong3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float3*", "globalconstfloat3p", - NULL - }, - { - "global_const_vector3_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalconstchar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globalconstuchar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalconstshort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalconstushort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalconstint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globalconstuint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globalconstlong3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalconstulong3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalconstfloat3restrictp", - NULL - }, - { - "global_volatile_vector3_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "globalvolatilechar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "globalvolatileuchar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "globalvolatileshort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "globalvolatileushort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "globalvolatileint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "globalvolatileuint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "globalvolatilelong3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "globalvolatileulong3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "globalvolatilefloat3p", - NULL - }, - { - "global_volatile_vector3_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalvolatilechar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globalvolatileuchar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalvolatileshort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalvolatileushort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalvolatileint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globalvolatileuint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globalvolatilelong3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalvolatileulong3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalvolatilefloat3restrictp", - NULL - }, - { - "global_const_volatile_vector3_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "globalconstvolatilechar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "globalconstvolatileuchar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "globalconstvolatileshort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "globalconstvolatileushort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "globalconstvolatileint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "globalconstvolatileuint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "globalconstvolatilelong3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "globalconstvolatileulong3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "globalconstvolatilefloat3p", - NULL - }, - { - "global_const_volatile_vector3_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalconstvolatilechar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globalconstvolatileuchar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalconstvolatileshort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalconstvolatileushort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalconstvolatileint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globalconstvolatileuint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globalconstvolatilelong3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalconstvolatileulong3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalconstvolatilefloat3restrictp", - NULL - }, - { - "local_vector3_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3*", "localchar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3*", "localuchar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3*", "localshort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3*", "localushort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3*", "localint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3*", "localuint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3*", "locallong3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3*", "localulong3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3*", "localfloat3p", - NULL - }, - { - "local_vector3_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localchar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localuchar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localshort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localushort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localuint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "locallong3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localulong3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localfloat3restrictp", - NULL - }, - { - "local_const_vector3_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char3*", "localconstchar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar3*", "localconstuchar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short3*", "localconstshort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort3*", "localconstushort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int3*", "localconstint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint3*", "localconstuint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long3*", "localconstlong3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong3*", "localconstulong3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float3*", "localconstfloat3p", - NULL - }, - { - "local_const_vector3_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localconstchar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localconstuchar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localconstshort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localconstushort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localconstint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localconstuint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "localconstlong3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localconstulong3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localconstfloat3restrictp", - NULL - }, - { - "local_volatile_vector3_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "localvolatilechar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "localvolatileuchar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "localvolatileshort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "localvolatileushort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "localvolatileint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "localvolatileuint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "localvolatilelong3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "localvolatileulong3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "localvolatilefloat3p", - NULL - }, - { - "local_volatile_vector3_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localvolatilechar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localvolatileuchar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localvolatileshort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localvolatileushort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localvolatileint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localvolatileuint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "localvolatilelong3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localvolatileulong3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localvolatilefloat3restrictp", - NULL - }, - { - "local_const_volatile_vector3_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "localconstvolatilechar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "localconstvolatileuchar3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "localconstvolatileshort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "localconstvolatileushort3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "localconstvolatileint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "localconstvolatileuint3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "localconstvolatilelong3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "localconstvolatileulong3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "localconstvolatilefloat3p", - NULL - }, - { - "local_const_volatile_vector3_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localconstvolatilechar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localconstvolatileuchar3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localconstvolatileshort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localconstvolatileushort3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localconstvolatileint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localconstvolatileuint3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "localconstvolatilelong3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localconstvolatileulong3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localconstvolatilefloat3restrictp", - NULL - }, - { - "vector3_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "char3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "uchar3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "short3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "ushort3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "int3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "uint3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "long3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "ulong3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "float3d", - NULL - }, - { - "const_vector3_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "constchar3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "constuchar3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "constshort3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "constushort3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "constint3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "constuint3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "constlong3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "constulong3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "constfloat3d", - NULL - }, - { - "private_vector3_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "privatechar3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "privateuchar3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "privateshort3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "privateushort3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "privateint3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "privateuint3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "privatelong3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "privateulong3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "privatefloat3d", - NULL - }, - { - "private_const_vector3_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "privateconstchar3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "privateconstuchar3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "privateconstshort3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "privateconstushort3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "privateconstint3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "privateconstuint3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "privateconstlong3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "privateconstulong3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "privateconstfloat3d", - NULL - }, - { - "constant_vector4_p0", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char4*", "constantchar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar4*", "constantuchar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short4*", "constantshort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort4*", "constantushort4p", - NULL - }, - { - "constant_vector4_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int4*", "constantint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint4*", "constantuint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long4*", "constantlong4p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong4*", "constantulong4p", - NULL - }, - { - "constant_vector4_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float4*", "constantfloat4p", - NULL - }, - { - "constant_vector4_restrict_p0", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "constantchar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "constantuchar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "constantshort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "constantushort4restrictp", - NULL - }, - { - "constant_vector4_restrict_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "constantint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "constantuint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "constantlong4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "constantulong4restrictp", - NULL - }, - { - "constant_vector4_restrict_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "constantfloat4restrictp", - NULL - }, - { - "global_vector4_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4*", "globalchar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4*", "globaluchar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4*", "globalshort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4*", "globalushort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4*", "globalint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4*", "globaluint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4*", "globallong4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4*", "globalulong4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4*", "globalfloat4p", - NULL - }, - { - "global_vector4_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalchar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globaluchar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalshort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalushort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globaluint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globallong4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalulong4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalfloat4restrictp", - NULL - }, - { - "global_const_vector4_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char4*", "globalconstchar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar4*", "globalconstuchar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short4*", "globalconstshort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort4*", "globalconstushort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int4*", "globalconstint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint4*", "globalconstuint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long4*", "globalconstlong4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong4*", "globalconstulong4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float4*", "globalconstfloat4p", - NULL - }, - { - "global_const_vector4_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalconstchar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globalconstuchar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalconstshort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalconstushort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalconstint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globalconstuint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globalconstlong4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalconstulong4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalconstfloat4restrictp", - NULL - }, - { - "global_volatile_vector4_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "globalvolatilechar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "globalvolatileuchar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "globalvolatileshort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "globalvolatileushort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "globalvolatileint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "globalvolatileuint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "globalvolatilelong4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "globalvolatileulong4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "globalvolatilefloat4p", - NULL - }, - { - "global_volatile_vector4_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalvolatilechar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globalvolatileuchar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalvolatileshort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalvolatileushort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalvolatileint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globalvolatileuint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globalvolatilelong4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalvolatileulong4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalvolatilefloat4restrictp", - NULL - }, - { - "global_const_volatile_vector4_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "globalconstvolatilechar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "globalconstvolatileuchar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "globalconstvolatileshort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "globalconstvolatileushort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "globalconstvolatileint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "globalconstvolatileuint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "globalconstvolatilelong4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "globalconstvolatileulong4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "globalconstvolatilefloat4p", - NULL - }, - { - "global_const_volatile_vector4_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalconstvolatilechar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globalconstvolatileuchar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalconstvolatileshort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalconstvolatileushort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalconstvolatileint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globalconstvolatileuint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globalconstvolatilelong4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalconstvolatileulong4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalconstvolatilefloat4restrictp", - NULL - }, - { - "local_vector4_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4*", "localchar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4*", "localuchar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4*", "localshort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4*", "localushort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4*", "localint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4*", "localuint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4*", "locallong4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4*", "localulong4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4*", "localfloat4p", - NULL - }, - { - "local_vector4_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localchar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localuchar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localshort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localushort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localuint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "locallong4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localulong4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localfloat4restrictp", - NULL - }, - { - "local_const_vector4_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char4*", "localconstchar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar4*", "localconstuchar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short4*", "localconstshort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort4*", "localconstushort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int4*", "localconstint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint4*", "localconstuint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long4*", "localconstlong4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong4*", "localconstulong4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float4*", "localconstfloat4p", - NULL - }, - { - "local_const_vector4_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localconstchar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localconstuchar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localconstshort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localconstushort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localconstint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localconstuint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "localconstlong4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localconstulong4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localconstfloat4restrictp", - NULL - }, - { - "local_volatile_vector4_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "localvolatilechar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "localvolatileuchar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "localvolatileshort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "localvolatileushort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "localvolatileint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "localvolatileuint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "localvolatilelong4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "localvolatileulong4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "localvolatilefloat4p", - NULL - }, - { - "local_volatile_vector4_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localvolatilechar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localvolatileuchar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localvolatileshort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localvolatileushort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localvolatileint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localvolatileuint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "localvolatilelong4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localvolatileulong4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localvolatilefloat4restrictp", - NULL - }, - { - "local_const_volatile_vector4_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "localconstvolatilechar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "localconstvolatileuchar4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "localconstvolatileshort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "localconstvolatileushort4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "localconstvolatileint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "localconstvolatileuint4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "localconstvolatilelong4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "localconstvolatileulong4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "localconstvolatilefloat4p", - NULL - }, - { - "local_const_volatile_vector4_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localconstvolatilechar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localconstvolatileuchar4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localconstvolatileshort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localconstvolatileushort4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localconstvolatileint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localconstvolatileuint4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "localconstvolatilelong4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localconstvolatileulong4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localconstvolatilefloat4restrictp", - NULL - }, - { - "vector4_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "char4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "uchar4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "short4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "ushort4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "int4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "uint4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "long4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "ulong4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "float4d", - NULL - }, - { - "const_vector4_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "constchar4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "constuchar4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "constshort4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "constushort4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "constint4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "constuint4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "constlong4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "constulong4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "constfloat4d", - NULL - }, - { - "private_vector4_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "privatechar4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "privateuchar4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "privateshort4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "privateushort4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "privateint4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "privateuint4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "privatelong4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "privateulong4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "privatefloat4d", - NULL - }, - { - "private_const_vector4_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "privateconstchar4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "privateconstuchar4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "privateconstshort4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "privateconstushort4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "privateconstint4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "privateconstuint4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "privateconstlong4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "privateconstulong4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "privateconstfloat4d", - NULL - }, - { - "constant_vector8_p0", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char8*", "constantchar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar8*", "constantuchar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short8*", "constantshort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort8*", "constantushort8p", - NULL - }, - { - "constant_vector8_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int8*", "constantint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint8*", "constantuint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long8*", "constantlong8p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong8*", "constantulong8p", - NULL - }, - { - "constant_vector8_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float8*", "constantfloat8p", - NULL - }, - { - "constant_vector8_restrict_p0", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "constantchar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "constantuchar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "constantshort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "constantushort8restrictp", - NULL - }, - { - "constant_vector8_restrict_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "constantint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "constantuint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "constantlong8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "constantulong8restrictp", - NULL - }, - { - "constant_vector8_restrict_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "constantfloat8restrictp", - NULL - }, - { - "global_vector8_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8*", "globalchar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8*", "globaluchar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8*", "globalshort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8*", "globalushort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8*", "globalint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8*", "globaluint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8*", "globallong8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8*", "globalulong8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8*", "globalfloat8p", - NULL - }, - { - "global_vector8_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalchar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globaluchar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalshort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalushort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globaluint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globallong8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalulong8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalfloat8restrictp", - NULL - }, - { - "global_const_vector8_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char8*", "globalconstchar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar8*", "globalconstuchar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short8*", "globalconstshort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort8*", "globalconstushort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int8*", "globalconstint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint8*", "globalconstuint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long8*", "globalconstlong8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong8*", "globalconstulong8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float8*", "globalconstfloat8p", - NULL - }, - { - "global_const_vector8_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalconstchar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globalconstuchar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalconstshort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalconstushort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalconstint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globalconstuint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globalconstlong8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalconstulong8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalconstfloat8restrictp", - NULL - }, - { - "global_volatile_vector8_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "globalvolatilechar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "globalvolatileuchar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "globalvolatileshort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "globalvolatileushort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "globalvolatileint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "globalvolatileuint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "globalvolatilelong8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "globalvolatileulong8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "globalvolatilefloat8p", - NULL - }, - { - "global_volatile_vector8_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalvolatilechar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globalvolatileuchar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalvolatileshort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalvolatileushort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalvolatileint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globalvolatileuint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globalvolatilelong8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalvolatileulong8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalvolatilefloat8restrictp", - NULL - }, - { - "global_const_volatile_vector8_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "globalconstvolatilechar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "globalconstvolatileuchar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "globalconstvolatileshort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "globalconstvolatileushort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "globalconstvolatileint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "globalconstvolatileuint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "globalconstvolatilelong8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "globalconstvolatileulong8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "globalconstvolatilefloat8p", - NULL - }, - { - "global_const_volatile_vector8_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalconstvolatilechar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globalconstvolatileuchar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalconstvolatileshort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalconstvolatileushort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalconstvolatileint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globalconstvolatileuint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globalconstvolatilelong8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalconstvolatileulong8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalconstvolatilefloat8restrictp", - NULL - }, - { - "local_vector8_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8*", "localchar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8*", "localuchar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8*", "localshort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8*", "localushort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8*", "localint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8*", "localuint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8*", "locallong8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8*", "localulong8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8*", "localfloat8p", - NULL - }, - { - "local_vector8_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localchar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localuchar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localshort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localushort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localuint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "locallong8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localulong8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localfloat8restrictp", - NULL - }, - { - "local_const_vector8_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char8*", "localconstchar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar8*", "localconstuchar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short8*", "localconstshort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort8*", "localconstushort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int8*", "localconstint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint8*", "localconstuint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long8*", "localconstlong8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong8*", "localconstulong8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float8*", "localconstfloat8p", - NULL - }, - { - "local_const_vector8_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localconstchar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localconstuchar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localconstshort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localconstushort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localconstint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localconstuint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "localconstlong8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localconstulong8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localconstfloat8restrictp", - NULL - }, - { - "local_volatile_vector8_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "localvolatilechar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "localvolatileuchar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "localvolatileshort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "localvolatileushort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "localvolatileint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "localvolatileuint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "localvolatilelong8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "localvolatileulong8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "localvolatilefloat8p", - NULL - }, - { - "local_volatile_vector8_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localvolatilechar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localvolatileuchar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localvolatileshort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localvolatileushort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localvolatileint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localvolatileuint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "localvolatilelong8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localvolatileulong8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localvolatilefloat8restrictp", - NULL - }, - { - "local_const_volatile_vector8_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "localconstvolatilechar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "localconstvolatileuchar8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "localconstvolatileshort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "localconstvolatileushort8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "localconstvolatileint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "localconstvolatileuint8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "localconstvolatilelong8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "localconstvolatileulong8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "localconstvolatilefloat8p", - NULL - }, - { - "local_const_volatile_vector8_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localconstvolatilechar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localconstvolatileuchar8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localconstvolatileshort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localconstvolatileushort8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localconstvolatileint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localconstvolatileuint8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "localconstvolatilelong8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localconstvolatileulong8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localconstvolatilefloat8restrictp", - NULL - }, - { - "vector8_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "char8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "uchar8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "short8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "ushort8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "int8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "uint8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "long8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "ulong8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "float8d", - NULL - }, - { - "const_vector8_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "constchar8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "constuchar8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "constshort8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "constushort8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "constint8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "constuint8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "constlong8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "constulong8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "constfloat8d", - NULL - }, - { - "private_vector8_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "privatechar8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "privateuchar8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "privateshort8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "privateushort8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "privateint8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "privateuint8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "privatelong8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "privateulong8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "privatefloat8d", - NULL - }, - { - "private_const_vector8_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "privateconstchar8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "privateconstuchar8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "privateconstshort8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "privateconstushort8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "privateconstint8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "privateconstuint8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "privateconstlong8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "privateconstulong8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "privateconstfloat8d", - NULL - }, - { - "constant_vector16_p0", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char16*", "constantchar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar16*", "constantuchar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short16*", "constantshort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort16*", "constantushort16p", - NULL - }, - { - "constant_vector16_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int16*", "constantint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint16*", "constantuint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long16*", "constantlong16p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong16*", "constantulong16p", - NULL - }, - { - "constant_vector16_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float16*", "constantfloat16p", - NULL - }, - { - "constant_vector16_restrict_p0", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "constantchar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "constantuchar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "constantshort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "constantushort16restrictp", - NULL - }, - { - "constant_vector16_restrict_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "constantint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "constantuint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "constantlong16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "constantulong16restrictp", - NULL - }, - { - "constant_vector16_restrict_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "constantfloat16restrictp", - NULL - }, - { - "global_vector16_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16*", "globalchar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16*", "globaluchar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16*", "globalshort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16*", "globalushort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16*", "globalint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16*", "globaluint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16*", "globallong16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16*", "globalulong16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16*", "globalfloat16p", - NULL - }, - { - "global_vector16_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalchar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globaluchar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalshort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalushort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globaluint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globallong16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalulong16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalfloat16restrictp", - NULL - }, - { - "global_const_vector16_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char16*", "globalconstchar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar16*", "globalconstuchar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short16*", "globalconstshort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort16*", "globalconstushort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int16*", "globalconstint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint16*", "globalconstuint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long16*", "globalconstlong16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong16*", "globalconstulong16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float16*", "globalconstfloat16p", - NULL - }, - { - "global_const_vector16_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalconstchar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globalconstuchar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalconstshort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalconstushort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalconstint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globalconstuint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globalconstlong16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalconstulong16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalconstfloat16restrictp", - NULL - }, - { - "global_volatile_vector16_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "globalvolatilechar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "globalvolatileuchar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "globalvolatileshort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "globalvolatileushort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "globalvolatileint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "globalvolatileuint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "globalvolatilelong16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "globalvolatileulong16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "globalvolatilefloat16p", - NULL - }, - { - "global_volatile_vector16_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalvolatilechar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globalvolatileuchar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalvolatileshort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalvolatileushort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalvolatileint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globalvolatileuint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globalvolatilelong16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalvolatileulong16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalvolatilefloat16restrictp", - NULL - }, - { - "global_const_volatile_vector16_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "globalconstvolatilechar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "globalconstvolatileuchar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "globalconstvolatileshort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "globalconstvolatileushort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "globalconstvolatileint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "globalconstvolatileuint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "globalconstvolatilelong16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "globalconstvolatileulong16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "globalconstvolatilefloat16p", - NULL - }, - { - "global_const_volatile_vector16_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalconstvolatilechar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globalconstvolatileuchar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalconstvolatileshort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalconstvolatileushort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalconstvolatileint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globalconstvolatileuint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globalconstvolatilelong16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalconstvolatileulong16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalconstvolatilefloat16restrictp", - NULL - }, - { - "local_vector16_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16*", "localchar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16*", "localuchar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16*", "localshort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16*", "localushort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16*", "localint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16*", "localuint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16*", "locallong16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16*", "localulong16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16*", "localfloat16p", - NULL - }, - { - "local_vector16_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localchar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localuchar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localshort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localushort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localuint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "locallong16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localulong16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localfloat16restrictp", - NULL - }, - { - "local_const_vector16_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char16*", "localconstchar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar16*", "localconstuchar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short16*", "localconstshort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort16*", "localconstushort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int16*", "localconstint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint16*", "localconstuint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long16*", "localconstlong16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong16*", "localconstulong16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float16*", "localconstfloat16p", - NULL - }, - { - "local_const_vector16_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localconstchar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localconstuchar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localconstshort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localconstushort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localconstint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localconstuint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "localconstlong16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localconstulong16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localconstfloat16restrictp", - NULL - }, - { - "local_volatile_vector16_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "localvolatilechar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "localvolatileuchar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "localvolatileshort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "localvolatileushort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "localvolatileint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "localvolatileuint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "localvolatilelong16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "localvolatileulong16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "localvolatilefloat16p", - NULL - }, - { - "local_volatile_vector16_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localvolatilechar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localvolatileuchar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localvolatileshort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localvolatileushort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localvolatileint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localvolatileuint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "localvolatilelong16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localvolatileulong16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localvolatilefloat16restrictp", - NULL - }, - { - "local_const_volatile_vector16_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "localconstvolatilechar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "localconstvolatileuchar16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "localconstvolatileshort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "localconstvolatileushort16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "localconstvolatileint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "localconstvolatileuint16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "localconstvolatilelong16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "localconstvolatileulong16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "localconstvolatilefloat16p", - NULL - }, - { - "local_const_volatile_vector16_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localconstvolatilechar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localconstvolatileuchar16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localconstvolatileshort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localconstvolatileushort16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localconstvolatileint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localconstvolatileuint16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "localconstvolatilelong16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localconstvolatileulong16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localconstvolatilefloat16restrictp", - NULL - }, - { - "vector16_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "char16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "uchar16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "short16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "ushort16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "int16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "uint16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "long16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "ulong16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "float16d", - NULL - }, - { - "const_vector16_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "constchar16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "constuchar16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "constshort16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "constushort16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "constint16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "constuint16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "constlong16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "constulong16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "constfloat16d", - NULL - }, - { - "private_vector16_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "privatechar16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "privateuchar16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "privateshort16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "privateushort16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "privateint16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "privateuint16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "privatelong16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "privateulong16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "privatefloat16d", - NULL - }, - { - "private_const_vector16_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "privateconstchar16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "privateconstuchar16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "privateconstshort16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "privateconstushort16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "privateconstint16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "privateconstuint16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "privateconstlong16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "privateconstulong16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "privateconstfloat16d", - NULL - }, - { - "constant_derived_p0", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_type*", "constanttypedef_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "struct struct_type*", "constantstructstruct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_struct_type*", "constanttypedef_struct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "union union_type*", "constantunionunion_typep", - NULL - }, - { - "constant_derived_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_union_type*", "constanttypedef_union_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "enum enum_type*", "constantenumenum_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_enum_type*", "constanttypedef_enum_typep", - NULL - }, - { - "constant_derived_restrict_p0", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "constanttypedef_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "constantstructstruct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "constanttypedef_struct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "constantunionunion_typerestrictp", - NULL - }, - { - "constant_derived_restrict_p1", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "constanttypedef_union_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "constantenumenum_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "constanttypedef_enum_typerestrictp", - NULL - }, - { - "global_derived_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type*", "globaltypedef_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type*", "globalstructstruct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type*", "globaltypedef_struct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type*", "globalunionunion_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type*", "globaltypedef_union_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type*", "globalenumenum_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type*", "globaltypedef_enum_typep", - NULL - }, - { - "global_derived_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globaltypedef_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalstructstruct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globaltypedef_struct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalunionunion_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globaltypedef_union_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalenumenum_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globaltypedef_enum_typerestrictp", - NULL - }, - { - "global_const_derived_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_type*", "globalconsttypedef_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "struct struct_type*", "globalconststructstruct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_struct_type*", "globalconsttypedef_struct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "union union_type*", "globalconstunionunion_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_union_type*", "globalconsttypedef_union_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "enum enum_type*", "globalconstenumenum_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_enum_type*", "globalconsttypedef_enum_typep", - NULL - }, - { - "global_const_derived_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globalconsttypedef_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalconststructstruct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globalconsttypedef_struct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalconstunionunion_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globalconsttypedef_union_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalconstenumenum_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globalconsttypedef_enum_typerestrictp", - NULL - }, - { - "global_volatile_derived_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "globalvolatiletypedef_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "globalvolatilestructstruct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "globalvolatiletypedef_struct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "globalvolatileunionunion_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "globalvolatiletypedef_union_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "globalvolatileenumenum_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "globalvolatiletypedef_enum_typep", - NULL - }, - { - "global_volatile_derived_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globalvolatiletypedef_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalvolatilestructstruct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globalvolatiletypedef_struct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalvolatileunionunion_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globalvolatiletypedef_union_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalvolatileenumenum_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globalvolatiletypedef_enum_typerestrictp", - NULL - }, - { - "global_const_volatile_derived_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "globalconstvolatiletypedef_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "globalconstvolatilestructstruct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "globalconstvolatiletypedef_struct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "globalconstvolatileunionunion_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "globalconstvolatiletypedef_union_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "globalconstvolatileenumenum_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "globalconstvolatiletypedef_enum_typep", - NULL - }, - { - "global_const_volatile_derived_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globalconstvolatiletypedef_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalconstvolatilestructstruct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globalconstvolatiletypedef_struct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalconstvolatileunionunion_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globalconstvolatiletypedef_union_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalconstvolatileenumenum_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globalconstvolatiletypedef_enum_typerestrictp", - NULL - }, - { - "local_derived_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type*", "localtypedef_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type*", "localstructstruct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type*", "localtypedef_struct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type*", "localunionunion_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type*", "localtypedef_union_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type*", "localenumenum_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type*", "localtypedef_enum_typep", - NULL - }, - { - "local_derived_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localtypedef_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localstructstruct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localtypedef_struct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localunionunion_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localtypedef_union_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localenumenum_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localtypedef_enum_typerestrictp", - NULL - }, - { - "local_const_derived_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_type*", "localconsttypedef_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "struct struct_type*", "localconststructstruct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_struct_type*", "localconsttypedef_struct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "union union_type*", "localconstunionunion_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_union_type*", "localconsttypedef_union_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "enum enum_type*", "localconstenumenum_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_enum_type*", "localconsttypedef_enum_typep", - NULL - }, - { - "local_const_derived_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localconsttypedef_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localconststructstruct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localconsttypedef_struct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localconstunionunion_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localconsttypedef_union_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localconstenumenum_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localconsttypedef_enum_typerestrictp", - NULL - }, - { - "local_volatile_derived_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "localvolatiletypedef_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "localvolatilestructstruct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "localvolatiletypedef_struct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "localvolatileunionunion_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "localvolatiletypedef_union_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "localvolatileenumenum_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "localvolatiletypedef_enum_typep", - NULL - }, - { - "local_volatile_derived_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localvolatiletypedef_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localvolatilestructstruct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localvolatiletypedef_struct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localvolatileunionunion_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localvolatiletypedef_union_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localvolatileenumenum_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localvolatiletypedef_enum_typerestrictp", - NULL - }, - { - "local_const_volatile_derived_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "localconstvolatiletypedef_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "localconstvolatilestructstruct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "localconstvolatiletypedef_struct_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "localconstvolatileunionunion_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "localconstvolatiletypedef_union_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "localconstvolatileenumenum_typep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "localconstvolatiletypedef_enum_typep", - NULL - }, - { - "local_const_volatile_derived_restrict_p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localconstvolatiletypedef_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localconstvolatilestructstruct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localconstvolatiletypedef_struct_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localconstvolatileunionunion_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localconstvolatiletypedef_union_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localconstvolatileenumenum_typerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localconstvolatiletypedef_enum_typerestrictp", - NULL - }, - { - "derived_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "typedef_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "structstruct_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "typedef_struct_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "unionunion_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "typedef_union_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "enumenum_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "typedef_enum_typed", - NULL - }, - { - "const_derived_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "consttypedef_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "conststructstruct_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "consttypedef_struct_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "constunionunion_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "consttypedef_union_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "constenumenum_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "consttypedef_enum_typed", - NULL - }, - { - "private_derived_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "privatetypedef_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "privatestructstruct_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "privatetypedef_struct_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "privateunionunion_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "privatetypedef_union_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "privateenumenum_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "privatetypedef_enum_typed", - NULL - }, - { - "private_const_derived_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "privateconsttypedef_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "privateconststructstruct_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "privateconsttypedef_struct_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "privateconstunionunion_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "privateconsttypedef_union_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "privateconstenumenum_typed", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "privateconsttypedef_enum_typed", - NULL - }, -}; - -// Support for optional image data type -static const char * image_kernel_args[] = { - "#pragma OPENCL EXTENSION cl_khr_3d_image_writes: enable\n" - "kernel void image_d(read_only image2d_t image2d_td0,\n" - " write_only image2d_t image2d_td1,\n" - " read_only image3d_t image3d_td2,\n" - " write_only image3d_t image3d_td3,\n" - " read_only image2d_array_t image2d_array_td4,\n" - " write_only image2d_array_t image2d_array_td5,\n" - " read_only image1d_t image1d_td6,\n" - " write_only image1d_t image1d_td7,\n" - " read_only image1d_buffer_t image1d_buffer_td8,\n" - " write_only image1d_buffer_t image1d_buffer_td9,\n" - " read_only image1d_array_t image1d_array_td10,\n" - " write_only image1d_array_t image1d_array_td11,\n" - " sampler_t sampler_td12)\n" - "{}\n", - "\n" -}; - -static const char * image_arg_info[][67] = { - { - "image_d", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_t", "image2d_td0", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_t", "image2d_td1", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image3d_t", "image3d_td2", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image3d_t", "image3d_td3", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_array_t", "image2d_array_td4", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_array_t", "image2d_array_td5", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_t", "image1d_td6", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_t", "image1d_td7", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_buffer_t", "image1d_buffer_td8", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_buffer_t", "image1d_buffer_td9", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_array_t", "image1d_array_td10", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_array_t", "image1d_array_td11", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "sampler_t", "sampler_td12", - NULL - }, -}; - -// Support for optional double data type -static const char * double_kernel_args[] = { - "kernel void double_scalar_p(constant double*constantdoublep,\n" - " constant double *restrict constantdoublerestrictp,\n" - " global double*globaldoublep,\n" - " global double *restrict globaldoublerestrictp,\n" - " global const double* globalconstdoublep,\n" - " global const double * restrict globalconstdoublerestrictp,\n" - " global volatile double*globalvolatiledoublep,\n" - " global volatile double *restrict globalvolatiledoublerestrictp,\n" - " global const volatile double* globalconstvolatiledoublep)\n" - "{}\n", - "\n" - "kernel void double_scalar_p2(global const volatile double * restrict globalconstvolatiledoublerestrictp,\n" - " local double*localdoublep,\n" - " local double *restrict localdoublerestrictp,\n" - " local const double* localconstdoublep,\n" - " local const double * restrict localconstdoublerestrictp,\n" - " local volatile double*localvolatiledoublep,\n" - " local volatile double *restrict localvolatiledoublerestrictp,\n" - " local const volatile double* localconstvolatiledoublep,\n" - " local const volatile double * restrict localconstvolatiledoublerestrictp)\n" - "{}\n", - "\n" - "kernel void double_scalar_d(double doubled,\n" - " const double constdoubled,\n" - " private double privatedoubled,\n" - " private const double privateconstdoubled)\n" - "{}\n", - "\n" - "kernel void double_vector2_p(constant double2*constantdouble2p,\n" - " constant double2 *restrict constantdouble2restrictp,\n" - " global double2*globaldouble2p,\n" - " global double2 *restrict globaldouble2restrictp,\n" - " global const double2* globalconstdouble2p,\n" - " global const double2 * restrict globalconstdouble2restrictp,\n" - " global volatile double2*globalvolatiledouble2p,\n" - " global volatile double2 *restrict globalvolatiledouble2restrictp,\n" - " global const volatile double2* globalconstvolatiledouble2p)\n" - "{}\n", - "\n" - "kernel void double_vector2_p2(global const volatile double2 * restrict globalconstvolatiledouble2restrictp,\n" - " local double2*localdouble2p,\n" - " local double2 *restrict localdouble2restrictp,\n" - " local const double2* localconstdouble2p,\n" - " local const double2 * restrict localconstdouble2restrictp,\n" - " local volatile double2*localvolatiledouble2p,\n" - " local volatile double2 *restrict localvolatiledouble2restrictp,\n" - " local const volatile double2* localconstvolatiledouble2p,\n" - " local const volatile double2 * restrict localconstvolatiledouble2restrictp)\n" - "{}\n", - "\n" - "kernel void double_vector2_d(double2 double2d,\n" - " const double2 constdouble2d,\n" - " private double2 privatedouble2d,\n" - " private const double2 privateconstdouble2d)\n" - "{}\n", - "\n" - "kernel void double_vector3_p(constant double3*constantdouble3p,\n" - " constant double3 *restrict constantdouble3restrictp,\n" - " global double3*globaldouble3p,\n" - " global double3 *restrict globaldouble3restrictp,\n" - " global const double3* globalconstdouble3p,\n" - " global const double3 * restrict globalconstdouble3restrictp,\n" - " global volatile double3*globalvolatiledouble3p,\n" - " global volatile double3 *restrict globalvolatiledouble3restrictp,\n" - " global const volatile double3* globalconstvolatiledouble3p)\n" - "{}\n", - "\n" - "kernel void double_vector3_p2(global const volatile double3 * restrict globalconstvolatiledouble3restrictp,\n" - " local double3*localdouble3p,\n" - " local double3 *restrict localdouble3restrictp,\n" - " local const double3* localconstdouble3p,\n" - " local const double3 * restrict localconstdouble3restrictp,\n" - " local volatile double3*localvolatiledouble3p,\n" - " local volatile double3 *restrict localvolatiledouble3restrictp,\n" - " local const volatile double3* localconstvolatiledouble3p,\n" - " local const volatile double3 * restrict localconstvolatiledouble3restrictp)\n" - "{}\n", - "\n" - "kernel void double_vector3_d(double3 double3d,\n" - " const double3 constdouble3d,\n" - " private double3 privatedouble3d,\n" - " private const double3 privateconstdouble3d)\n" - "{}\n", - "\n" - "kernel void double_vector4_p(constant double4*constantdouble4p,\n" - " constant double4 *restrict constantdouble4restrictp,\n" - " global double4*globaldouble4p,\n" - " global double4 *restrict globaldouble4restrictp,\n" - " global const double4* globalconstdouble4p,\n" - " global const double4 * restrict globalconstdouble4restrictp,\n" - " global volatile double4*globalvolatiledouble4p,\n" - " global volatile double4 *restrict globalvolatiledouble4restrictp,\n" - " global const volatile double4* globalconstvolatiledouble4p)\n" - "{}\n", - "\n" - "kernel void double_vector4_p2(global const volatile double4 * restrict globalconstvolatiledouble4restrictp,\n" - " local double4*localdouble4p,\n" - " local double4 *restrict localdouble4restrictp,\n" - " local const double4* localconstdouble4p,\n" - " local const double4 * restrict localconstdouble4restrictp,\n" - " local volatile double4*localvolatiledouble4p,\n" - " local volatile double4 *restrict localvolatiledouble4restrictp,\n" - " local const volatile double4* localconstvolatiledouble4p,\n" - " local const volatile double4 * restrict localconstvolatiledouble4restrictp)\n" - "{}\n", - "\n" - "kernel void double_vector4_d(double4 double4d,\n" - " const double4 constdouble4d,\n" - " private double4 privatedouble4d,\n" - " private const double4 privateconstdouble4d)\n" - "{}\n", - "\n" - "kernel void double_vector8_p(constant double8*constantdouble8p,\n" - " constant double8 *restrict constantdouble8restrictp,\n" - " global double8*globaldouble8p,\n" - " global double8 *restrict globaldouble8restrictp,\n" - " global const double8* globalconstdouble8p,\n" - " global const double8 * restrict globalconstdouble8restrictp,\n" - " global volatile double8*globalvolatiledouble8p,\n" - " global volatile double8 *restrict globalvolatiledouble8restrictp,\n" - " global const volatile double8* globalconstvolatiledouble8p)\n" - "{}\n", - "\n" - "kernel void double_vector8_p2(global const volatile double8 * restrict globalconstvolatiledouble8restrictp,\n" - " local double8*localdouble8p,\n" - " local double8 *restrict localdouble8restrictp,\n" - " local const double8* localconstdouble8p,\n" - " local const double8 * restrict localconstdouble8restrictp,\n" - " local volatile double8*localvolatiledouble8p,\n" - " local volatile double8 *restrict localvolatiledouble8restrictp,\n" - " local const volatile double8* localconstvolatiledouble8p,\n" - " local const volatile double8 * restrict localconstvolatiledouble8restrictp)\n" - "{}\n", - "\n" - "kernel void double_vector8_d(double8 double8d,\n" - " const double8 constdouble8d,\n" - " private double8 privatedouble8d,\n" - " private const double8 privateconstdouble8d)\n" - "{}\n", - "\n" - "kernel void double_vector16_p(constant double16*constantdouble16p,\n" - " constant double16 *restrict constantdouble16restrictp,\n" - " global double16*globaldouble16p,\n" - " global double16 *restrict globaldouble16restrictp,\n" - " global const double16* globalconstdouble16p,\n" - " global const double16 * restrict globalconstdouble16restrictp,\n" - " global volatile double16*globalvolatiledouble16p,\n" - " global volatile double16 *restrict globalvolatiledouble16restrictp,\n" - " global const volatile double16* globalconstvolatiledouble16p)\n" - "{}\n", - "\n" - "kernel void double_vector16_p2(global const volatile double16 * restrict globalconstvolatiledouble16restrictp,\n" - " local double16*localdouble16p,\n" - " local double16 *restrict localdouble16restrictp,\n" - " local const double16* localconstdouble16p,\n" - " local const double16 * restrict localconstdouble16restrictp,\n" - " local volatile double16*localvolatiledouble16p,\n" - " local volatile double16 *restrict localvolatiledouble16restrictp,\n" - " local const volatile double16* localconstvolatiledouble16p,\n" - " local const volatile double16 * restrict localconstvolatiledouble16restrictp)\n" - "{}\n", - "\n" - "kernel void double_vector16_d(double16 double16d,\n" - " const double16 constdouble16d,\n" - " private double16 privatedouble16d,\n" - " private const double16 privateconstdouble16d)\n" - "{}\n", - "\n" -}; - -static const char * double_arg_info[][77] = { - { - "double_scalar_p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double*", "constantdoublep", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "constantdoublerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double*", "globaldoublep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globaldoublerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double*", "globalconstdoublep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globalconstdoublerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "globalvolatiledoublep", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globalvolatiledoublerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "globalconstvolatiledoublep", - NULL - }, - { - "double_scalar_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globalconstvolatiledoublerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double*", "localdoublep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localdoublerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double*", "localconstdoublep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localconstdoublerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "localvolatiledoublep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localvolatiledoublerestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "localconstvolatiledoublep", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localconstvolatiledoublerestrictp", - NULL - }, - { - "double_scalar_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "doubled", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "constdoubled", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "privatedoubled", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "privateconstdoubled", - NULL - }, - { - "double_vector2_p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double2*", "constantdouble2p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "constantdouble2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2*", "globaldouble2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globaldouble2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double2*", "globalconstdouble2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globalconstdouble2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "globalvolatiledouble2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globalvolatiledouble2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "globalconstvolatiledouble2p", - NULL - }, - { - "double_vector2_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globalconstvolatiledouble2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2*", "localdouble2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localdouble2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double2*", "localconstdouble2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localconstdouble2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "localvolatiledouble2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localvolatiledouble2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "localconstvolatiledouble2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localconstvolatiledouble2restrictp", - NULL - }, - { - "double_vector2_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "double2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "constdouble2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "privatedouble2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "privateconstdouble2d", - NULL - }, - { - "double_vector3_p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double3*", "constantdouble3p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "constantdouble3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3*", "globaldouble3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globaldouble3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double3*", "globalconstdouble3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globalconstdouble3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "globalvolatiledouble3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globalvolatiledouble3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "globalconstvolatiledouble3p", - NULL - }, - { - "double_vector3_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globalconstvolatiledouble3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3*", "localdouble3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localdouble3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double3*", "localconstdouble3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localconstdouble3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "localvolatiledouble3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localvolatiledouble3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "localconstvolatiledouble3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localconstvolatiledouble3restrictp", - NULL - }, - { - "double_vector3_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "double3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "constdouble3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "privatedouble3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "privateconstdouble3d", - NULL - }, - { - "double_vector4_p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double4*", "constantdouble4p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "constantdouble4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4*", "globaldouble4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globaldouble4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double4*", "globalconstdouble4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globalconstdouble4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "globalvolatiledouble4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globalvolatiledouble4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "globalconstvolatiledouble4p", - NULL - }, - { - "double_vector4_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globalconstvolatiledouble4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4*", "localdouble4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localdouble4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double4*", "localconstdouble4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localconstdouble4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "localvolatiledouble4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localvolatiledouble4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "localconstvolatiledouble4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localconstvolatiledouble4restrictp", - NULL - }, - { - "double_vector4_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "double4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "constdouble4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "privatedouble4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "privateconstdouble4d", - NULL - }, - { - "double_vector8_p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double8*", "constantdouble8p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "constantdouble8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8*", "globaldouble8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globaldouble8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double8*", "globalconstdouble8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globalconstdouble8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "globalvolatiledouble8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globalvolatiledouble8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "globalconstvolatiledouble8p", - NULL - }, - { - "double_vector8_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globalconstvolatiledouble8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8*", "localdouble8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localdouble8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double8*", "localconstdouble8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localconstdouble8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "localvolatiledouble8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localvolatiledouble8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "localconstvolatiledouble8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localconstvolatiledouble8restrictp", - NULL - }, - { - "double_vector8_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "double8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "constdouble8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "privatedouble8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "privateconstdouble8d", - NULL - }, - { - "double_vector16_p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double16*", "constantdouble16p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "constantdouble16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16*", "globaldouble16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globaldouble16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double16*", "globalconstdouble16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globalconstdouble16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "globalvolatiledouble16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globalvolatiledouble16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "globalconstvolatiledouble16p", - NULL - }, - { - "double_vector16_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globalconstvolatiledouble16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16*", "localdouble16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localdouble16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double16*", "localconstdouble16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localconstdouble16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "localvolatiledouble16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localvolatiledouble16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "localconstvolatiledouble16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localconstvolatiledouble16restrictp", - NULL - }, - { - "double_vector16_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "double16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "constdouble16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "privatedouble16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "privateconstdouble16d", - NULL - }, -}; - - -// Support for optional half data type -static const char * half_kernel_args[] = { - "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n" - "\n" - "kernel void half_scalar_p(constant half*constanthalfp,\n" - " constant half *restrict constanthalfrestrictp,\n" - " global half*globalhalfp,\n" - " global half *restrict globalhalfrestrictp,\n" - " global const half* globalconsthalfp,\n" - " global const half * restrict globalconsthalfrestrictp,\n" - " global volatile half*globalvolatilehalfp,\n" - " global volatile half *restrict globalvolatilehalfrestrictp,\n" - " global const volatile half* globalconstvolatilehalfp)\n" - "{}\n", - "\n" - "kernel void half_scalar_p2(global const volatile half * restrict globalconstvolatilehalfrestrictp,\n" - " local half*localhalfp,\n" - " local half *restrict localhalfrestrictp,\n" - " local const half* localconsthalfp,\n" - " local const half * restrict localconsthalfrestrictp,\n" - " local volatile half*localvolatilehalfp,\n" - " local volatile half *restrict localvolatilehalfrestrictp,\n" - " local const volatile half* localconstvolatilehalfp,\n" - " local const volatile half * restrict localconstvolatilehalfrestrictp)\n" - "{}\n", - "\n" - "kernel void half_scalar_d(half halfd,\n" - " const half consthalfd,\n" - " private half privatehalfd,\n" - " private const half privateconsthalfd)\n" - "{}\n", - "\n" - "kernel void half_vector2_p(constant half2*constanthalf2p,\n" - " constant half2 *restrict constanthalf2restrictp,\n" - " global half2*globalhalf2p,\n" - " global half2 *restrict globalhalf2restrictp,\n" - " global const half2* globalconsthalf2p,\n" - " global const half2 * restrict globalconsthalf2restrictp,\n" - " global volatile half2*globalvolatilehalf2p,\n" - " global volatile half2 *restrict globalvolatilehalf2restrictp,\n" - " global const volatile half2* globalconstvolatilehalf2p)\n" - "{}\n", - "\n" - "kernel void half_vector2_p2(global const volatile half2 * restrict globalconstvolatilehalf2restrictp,\n" - " local half2*localhalf2p,\n" - " local half2 *restrict localhalf2restrictp,\n" - " local const half2* localconsthalf2p,\n" - " local const half2 * restrict localconsthalf2restrictp,\n" - " local volatile half2*localvolatilehalf2p,\n" - " local volatile half2 *restrict localvolatilehalf2restrictp,\n" - " local const volatile half2* localconstvolatilehalf2p,\n" - " local const volatile half2 * restrict localconstvolatilehalf2restrictp)\n" - "{}\n", - "\n" - "kernel void half_vector2_d(half2 half2d,\n" - " const half2 consthalf2d,\n" - " private half2 privatehalf2d,\n" - " private const half2 privateconsthalf2d)\n" - "{}\n", - "\n" - "kernel void half_vector3_p(constant half3*constanthalf3p,\n" - " constant half3 *restrict constanthalf3restrictp,\n" - " global half3*globalhalf3p,\n" - " global half3 *restrict globalhalf3restrictp,\n" - " global const half3* globalconsthalf3p,\n" - " global const half3 * restrict globalconsthalf3restrictp,\n" - " global volatile half3*globalvolatilehalf3p,\n" - " global volatile half3 *restrict globalvolatilehalf3restrictp,\n" - " global const volatile half3* globalconstvolatilehalf3p)\n" - "{}\n", - "\n" - "kernel void half_vector3_p2(global const volatile half3 * restrict globalconstvolatilehalf3restrictp,\n" - " local half3*localhalf3p,\n" - " local half3 *restrict localhalf3restrictp,\n" - " local const half3* localconsthalf3p,\n" - " local const half3 * restrict localconsthalf3restrictp,\n" - " local volatile half3*localvolatilehalf3p,\n" - " local volatile half3 *restrict localvolatilehalf3restrictp,\n" - " local const volatile half3* localconstvolatilehalf3p,\n" - " local const volatile half3 * restrict localconstvolatilehalf3restrictp)\n" - "{}\n", - "\n" - "kernel void half_vector3_d(half3 half3d,\n" - " const half3 consthalf3d,\n" - " private half3 privatehalf3d,\n" - " private const half3 privateconsthalf3d)\n" - "{}\n", - "\n" - "kernel void half_vector4_p(constant half4*constanthalf4p,\n" - " constant half4 *restrict constanthalf4restrictp,\n" - " global half4*globalhalf4p,\n" - " global half4 *restrict globalhalf4restrictp,\n" - " global const half4* globalconsthalf4p,\n" - " global const half4 * restrict globalconsthalf4restrictp,\n" - " global volatile half4*globalvolatilehalf4p,\n" - " global volatile half4 *restrict globalvolatilehalf4restrictp,\n" - " global const volatile half4* globalconstvolatilehalf4p)\n" - "{}\n", - "\n" - "kernel void half_vector4_p2(global const volatile half4 * restrict globalconstvolatilehalf4restrictp,\n" - " local half4*localhalf4p,\n" - " local half4 *restrict localhalf4restrictp,\n" - " local const half4* localconsthalf4p,\n" - " local const half4 * restrict localconsthalf4restrictp,\n" - " local volatile half4*localvolatilehalf4p,\n" - " local volatile half4 *restrict localvolatilehalf4restrictp,\n" - " local const volatile half4* localconstvolatilehalf4p,\n" - " local const volatile half4 * restrict localconstvolatilehalf4restrictp)\n" - "{}\n", - "\n" - "kernel void half_vector4_d(half4 half4d,\n" - " const half4 consthalf4d,\n" - " private half4 privatehalf4d,\n" - " private const half4 privateconsthalf4d)\n" - "{}\n", - "\n" - "kernel void half_vector8_p(constant half8*constanthalf8p,\n" - " constant half8 *restrict constanthalf8restrictp,\n" - " global half8*globalhalf8p,\n" - " global half8 *restrict globalhalf8restrictp,\n" - " global const half8* globalconsthalf8p,\n" - " global const half8 * restrict globalconsthalf8restrictp,\n" - " global volatile half8*globalvolatilehalf8p,\n" - " global volatile half8 *restrict globalvolatilehalf8restrictp,\n" - " global const volatile half8* globalconstvolatilehalf8p)\n" - "{}\n", - "\n" - "kernel void half_vector8_p2(global const volatile half8 * restrict globalconstvolatilehalf8restrictp,\n" - " local half8*localhalf8p,\n" - " local half8 *restrict localhalf8restrictp,\n" - " local const half8* localconsthalf8p,\n" - " local const half8 * restrict localconsthalf8restrictp,\n" - " local volatile half8*localvolatilehalf8p,\n" - " local volatile half8 *restrict localvolatilehalf8restrictp,\n" - " local const volatile half8* localconstvolatilehalf8p,\n" - " local const volatile half8 * restrict localconstvolatilehalf8restrictp)\n" - "{}\n", - "\n" - "kernel void half_vector8_d(half8 half8d,\n" - " const half8 consthalf8d,\n" - " private half8 privatehalf8d,\n" - " private const half8 privateconsthalf8d)\n" - "{}\n", - "\n" - "kernel void half_vector16_p(constant half16*constanthalf16p,\n" - " constant half16 *restrict constanthalf16restrictp,\n" - " global half16*globalhalf16p,\n" - " global half16 *restrict globalhalf16restrictp,\n" - " global const half16* globalconsthalf16p,\n" - " global const half16 * restrict globalconsthalf16restrictp,\n" - " global volatile half16*globalvolatilehalf16p,\n" - " global volatile half16 *restrict globalvolatilehalf16restrictp,\n" - " global const volatile half16* globalconstvolatilehalf16p)\n" - "{}\n", - "\n" - "kernel void half_vector16_p2(global const volatile half16 * restrict globalconstvolatilehalf16restrictp,\n" - " local half16*localhalf16p,\n" - " local half16 *restrict localhalf16restrictp,\n" - " local const half16* localconsthalf16p,\n" - " local const half16 * restrict localconsthalf16restrictp,\n" - " local volatile half16*localvolatilehalf16p,\n" - " local volatile half16 *restrict localvolatilehalf16restrictp,\n" - " local const volatile half16* localconstvolatilehalf16p,\n" - " local const volatile half16 * restrict localconstvolatilehalf16restrictp)\n" - "{}\n", - "\n" - "kernel void half_vector16_d(half16 half16d,\n" - " const half16 consthalf16d,\n" - " private half16 privatehalf16d,\n" - " private const half16 privateconsthalf16d)\n" - "{}\n", - "\n" -}; - -static const char * half_arg_info[][77] = { - { - "half_scalar_p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half*", "constanthalfp", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "constanthalfrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half*", "globalhalfp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalhalfrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half*", "globalconsthalfp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalconsthalfrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "globalvolatilehalfp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalvolatilehalfrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "globalconstvolatilehalfp", - NULL - }, - { - "half_scalar_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalconstvolatilehalfrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half*", "localhalfp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localhalfrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half*", "localconsthalfp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localconsthalfrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "localvolatilehalfp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localvolatilehalfrestrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "localconstvolatilehalfp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localconstvolatilehalfrestrictp", - NULL - }, - { - "half_scalar_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half", "halfd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half", "consthalfd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half", "privatehalfd", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half", "privateconsthalfd", - NULL - }, - { - "half_vector2_p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half2*", "constanthalf2p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "constanthalf2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2*", "globalhalf2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalhalf2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half2*", "globalconsthalf2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalconsthalf2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "globalvolatilehalf2p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalvolatilehalf2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "globalconstvolatilehalf2p", - NULL - }, - { - "half_vector2_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalconstvolatilehalf2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2*", "localhalf2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localhalf2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half2*", "localconsthalf2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localconsthalf2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "localvolatilehalf2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localvolatilehalf2restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "localconstvolatilehalf2p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localconstvolatilehalf2restrictp", - NULL - }, - { - "half_vector2_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2", "half2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2", "consthalf2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2", "privatehalf2d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2", "privateconsthalf2d", - NULL - }, - { - "half_vector3_p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half3*", "constanthalf3p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "constanthalf3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3*", "globalhalf3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalhalf3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half3*", "globalconsthalf3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalconsthalf3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "globalvolatilehalf3p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalvolatilehalf3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "globalconstvolatilehalf3p", - NULL - }, - { - "half_vector3_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalconstvolatilehalf3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3*", "localhalf3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localhalf3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half3*", "localconsthalf3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localconsthalf3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "localvolatilehalf3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localvolatilehalf3restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "localconstvolatilehalf3p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localconstvolatilehalf3restrictp", - NULL - }, - { - "half_vector3_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3", "half3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3", "consthalf3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3", "privatehalf3d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3", "privateconsthalf3d", - NULL - }, - { - "half_vector4_p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half4*", "constanthalf4p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "constanthalf4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4*", "globalhalf4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalhalf4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half4*", "globalconsthalf4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalconsthalf4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "globalvolatilehalf4p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalvolatilehalf4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "globalconstvolatilehalf4p", - NULL - }, - { - "half_vector4_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalconstvolatilehalf4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4*", "localhalf4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localhalf4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half4*", "localconsthalf4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localconsthalf4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "localvolatilehalf4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localvolatilehalf4restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "localconstvolatilehalf4p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localconstvolatilehalf4restrictp", - NULL - }, - { - "half_vector4_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4", "half4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4", "consthalf4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4", "privatehalf4d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4", "privateconsthalf4d", - NULL - }, - { - "half_vector8_p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half8*", "constanthalf8p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "constanthalf8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8*", "globalhalf8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalhalf8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half8*", "globalconsthalf8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalconsthalf8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "globalvolatilehalf8p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalvolatilehalf8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "globalconstvolatilehalf8p", - NULL - }, - { - "half_vector8_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalconstvolatilehalf8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8*", "localhalf8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localhalf8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half8*", "localconsthalf8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localconsthalf8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "localvolatilehalf8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localvolatilehalf8restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "localconstvolatilehalf8p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localconstvolatilehalf8restrictp", - NULL - }, - { - "half_vector8_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8", "half8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8", "consthalf8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8", "privatehalf8d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8", "privateconsthalf8d", - NULL - }, - { - "half_vector16_p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half16*", "constanthalf16p", - (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "constanthalf16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16*", "globalhalf16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalhalf16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half16*", "globalconsthalf16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalconsthalf16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "globalvolatilehalf16p", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalvolatilehalf16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "globalconstvolatilehalf16p", - NULL - }, - { - "half_vector16_p2", - (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalconstvolatilehalf16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16*", "localhalf16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localhalf16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half16*", "localconsthalf16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localconsthalf16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "localvolatilehalf16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localvolatilehalf16restrictp", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "localconstvolatilehalf16p", - (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localconstvolatilehalf16restrictp", - NULL - }, - { - "half_vector16_d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16", "half16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16", "consthalf16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16", "privatehalf16d", - (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16", "privateconsthalf16d", - NULL - }, -}; - - -template -int test(cl_device_id deviceID, cl_context context, kernel_args_t kernel_args, cl_uint lines_count, arg_info_t arg_info, size_t total_kernels_in_program) { - - const size_t max_name_len = 512; - cl_char name[ max_name_len ]; - cl_uint arg_count, numArgs; - size_t i, j, size; - int error; - - clProgramWrapper program = - clCreateProgramWithSource(context, lines_count, kernel_args, NULL, &error); - if ( program == NULL || error != CL_SUCCESS ) - { - print_error( error, "Unable to create required arguments kernel program" ); - return -1; - } - - // Compile the program - log_info( "Building kernels...\n" ); - clBuildProgram( program, 1, &deviceID, "-cl-kernel-arg-info", NULL, NULL ); - - // check for build errors and exit if things didn't work - size_t size_ret; - cl_build_status build_status; - error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof(build_status), &build_status, &size_ret); - test_error( error, "Unable to query build status" ); - if (build_status == CL_BUILD_ERROR) { - printf("CL_PROGRAM_BUILD_STATUS=%d\n", (int) build_status); - error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret); - test_error( error, "Unable to get build log size" ); - char *build_log = (char *)malloc(size_ret); - error = clGetProgramBuildInfo(program,deviceID, CL_PROGRAM_BUILD_LOG, size_ret, build_log, &size_ret); - test_error( error, "Unable to get build log" ); - printf("CL_PROGRAM_BUILD_LOG:\n%s\n", build_log); - printf("CL_BUILD_ERROR. exiting\n"); - free(build_log); - return -1; - } - - // Lookup the number of kernels in the program. - log_info( "Testing kernels...\n" ); - size_t total_kernels = 0; - error = clGetProgramInfo( program, CL_PROGRAM_NUM_KERNELS, sizeof( size_t ), &total_kernels, NULL ); - test_error( error, "Unable to get program info num kernels" ); - - if ( total_kernels != total_kernels_in_program ) - { - print_error( error, "Program did not build all kernels" ); - return -1; - } - - // Lookup the kernel names. - size_t kernel_names_len = 0; - error = clGetProgramInfo( program, CL_PROGRAM_KERNEL_NAMES, 0, NULL, &kernel_names_len ); - test_error( error, "Unable to get length of kernel names list." ); - - size_t expected_kernel_names_len = 0; - for ( i = 0; i < total_kernels; ++i ) - { - expected_kernel_names_len += 1 + strlen( arg_info[ i ][ 0 ] ); - } - if ( kernel_names_len != expected_kernel_names_len ) - { - log_error( "Kernel names string is not the right length, expected %d, got %d\n", (int) expected_kernel_names_len, (int) kernel_names_len ); - return -1; - } - - const size_t len = ( kernel_names_len + 1 ) * sizeof( char ); - char* kernel_names = (char*) malloc( len ); - error = clGetProgramInfo( program, CL_PROGRAM_KERNEL_NAMES, len, kernel_names, &kernel_names_len ); - test_error( error, "Unable to get kernel names list." ); - - // Check to see if the kernel name array is null terminated. - if ( kernel_names[ kernel_names_len - 1 ] != '\0' ) - { - free( kernel_names ); - print_error( error, "Kernel name list was not null terminated" ); - return -1; - } - - // Check to see if the correct kernel name string was returned. - // Does the string contain each expected kernel name? - for ( i = 0; i < total_kernels; ++i ) - if ( !strstr( kernel_names, arg_info[ i ][ 0 ] ) ) - break; - if ( i != total_kernels ) - { - log_error( "Kernel names string is missing \"%s\"\n", arg_info[ i ][ 0 ] ); - free( kernel_names ); - return -1; - } - - // Are the kernel names delimited by ';'? - if ( !strtok( kernel_names, ";" ) ) - { - error = -1; - } - else - { - for ( i = 1; i < total_kernels; ++i ) - { - if ( !strtok( NULL, ";" ) ) - { - error = -1; - } - } - } - if ( error ) - { - log_error( "Kernel names string was not properly delimited by ';'\n" ); - free( kernel_names ); - return -1; - } - free( kernel_names ); - - // Create kernel objects and query them. - int rc = 0; - for ( i = 0; i < total_kernels; ++i ) - { - int kernel_rc = 0; - const char* kernel_name = arg_info[ i ][ 0 ]; - clKernelWrapper kernel = clCreateKernel(program, kernel_name, &error); - if( kernel == NULL || error != CL_SUCCESS ) - { - log_error( "ERROR: Could not get kernel: %s\n", kernel_name ); - kernel_rc = -1; - } - - if(kernel_rc == 0) - { - // Determine the expected number of arguments. - arg_count = 0; - while (arg_info[ i ][ (ARG_INFO_FIELD_COUNT * arg_count) + 1 ] != NULL) - ++arg_count; - - // Try to get the number of arguments. - error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, &size ); - test_error( error, "Unable to get kernel arg count param size" ); - if( size != sizeof( numArgs ) ) - { - log_error( "ERROR: Kernel arg count param returns invalid size (expected %d, got %d) for kernel: %s\n", (int)sizeof( numArgs ), (int)size, kernel_name ); - kernel_rc = -1; - } - } - - - if(kernel_rc == 0) - { - error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( numArgs ), &numArgs, NULL ); - test_error( error, "Unable to get kernel arg count" ); - if( numArgs != arg_count ) - { - log_error( "ERROR: Kernel arg count returned invalid value (expected %d, got %d) for kernel: %s\n", arg_count, numArgs, kernel_name ); - kernel_rc = -1; - } - } - - if(kernel_rc == 0) - { - for ( j = 0; j < numArgs; ++j ) - { - - int arg_rc = 0; - cl_kernel_arg_address_qualifier expected_address_qualifier = (cl_kernel_arg_address_qualifier)(uintptr_t)arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_ADDR_OFFSET ]; - cl_kernel_arg_access_qualifier expected_access_qualifier = (cl_kernel_arg_access_qualifier)(uintptr_t)arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_ACCESS_OFFSET ]; - cl_kernel_arg_type_qualifier expected_type_qualifier = (cl_kernel_arg_type_qualifier)(uintptr_t)arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_TYPE_QUAL_OFFSET ]; - const char* expected_type_name = arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_TYPE_NAME_OFFSET ]; - const char* expected_arg_name = arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_ARG_NAME_OFFSET ]; - - // Try to get the address qualifier of each argument. - cl_kernel_arg_address_qualifier address_qualifier = 0; - error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_ADDRESS_QUALIFIER, sizeof address_qualifier, &address_qualifier, &size ); - test_error( error, "Unable to get argument address qualifier" ); - error = (address_qualifier != expected_address_qualifier); - if ( error ) - { - log_error( "ERROR: Bad address qualifier, kernel: \"%s\", argument number: %d, expected \"0x%X\", got \"0x%X\"\n", kernel_name, (unsigned int)j, (unsigned int)expected_address_qualifier, (unsigned int)address_qualifier ); - arg_rc = -1; - } - - // Try to get the access qualifier of each argument. - cl_kernel_arg_access_qualifier access_qualifier = 0; - error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_ACCESS_QUALIFIER, sizeof access_qualifier, &access_qualifier, &size ); - test_error( error, "Unable to get argument access qualifier" ); - error = (access_qualifier != expected_access_qualifier); - if ( error ) - { - log_error( "ERROR: Bad access qualifier, kernel: \"%s\", argument number: %d, expected \"0x%X\", got \"0x%X\"\n", kernel_name, (unsigned int)j, (unsigned int)expected_access_qualifier, (unsigned int)access_qualifier ); - arg_rc = -1; - } - - // Try to get the type qualifier of each argument. - cl_kernel_arg_type_qualifier arg_type_qualifier = 0; - error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_TYPE_QUALIFIER, sizeof arg_type_qualifier, &arg_type_qualifier, &size ); - test_error( error, "Unable to get argument type qualifier" ); - error = (arg_type_qualifier != expected_type_qualifier); - if ( error ) - { - log_error( "ERROR: Bad type qualifier, kernel: \"%s\", argument number: %d, expected \"0x%X\", got \"0x%X\"\n", kernel_name, (unsigned int)j, (unsigned int)expected_type_qualifier, (unsigned int)arg_type_qualifier ); - arg_rc = -1; - } - - // Try to get the type of each argument. - memset( name, 0, max_name_len ); - error = clGetKernelArgInfo(kernel, (cl_uint)j, CL_KERNEL_ARG_TYPE_NAME, max_name_len, name, &size ); - test_error( error, "Unable to get argument type name" ); - error = strcmp( (const char*) name, expected_type_name ); - if ( error ) - { - log_error( "ERROR: Bad argument type name, kernel: \"%s\", argument number: %d, expected \"%s\", got \"%s\"\n", kernel_name, (unsigned int)j, expected_type_name, name ); - arg_rc = -1; - } - - // Try to get the name of each argument. - memset( name, 0, max_name_len ); - error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_NAME, max_name_len, name, &size ); - test_error( error, "Unable to get argument name" ); - error = strcmp( (const char*) name, expected_arg_name ); - if ( error ) - { - log_error( "ERROR: Bad argument name, kernel: \"%s\", argument number: %d, expected \"%s\", got \"%s\"\n", kernel_name, (unsigned int)j, expected_arg_name, name ); - arg_rc = -1; - } - - if(arg_rc != 0) { - kernel_rc = -1; - } - } - } - - //log_info( "%s ... %s\n",arg_info[i][0],kernel_rc == 0 ? "passed" : "failed" ); - if(kernel_rc != 0) { - rc = -1; - } - } - return rc; -} - - -int test_get_kernel_arg_info_compatibility( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - size_t size; - int error; - - cl_bool supports_double = 0; // assume not - cl_bool supports_half = 0; // assume not - cl_bool supports_images = 0; // assume not - - // Check if this device supports images - error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_SUPPORT, sizeof supports_images, &supports_images, NULL); - test_error(error, "clGetDeviceInfo for CL_DEVICE_IMAGE_SUPPORT failed"); - - if (supports_images) { - log_info(" o Device supports images\n"); - log_info(" o Expecting SUCCESS when testing image kernel arguments.\n"); - } - else { - log_info(" o Device lacks image support\n"); - log_info(" o Not testing image kernel arguments.\n"); - } - - if (is_extension_available(deviceID, "cl_khr_fp64")) { - log_info(" o Device claims extension 'cl_khr_fp64'\n"); - log_info(" o Expecting SUCCESS when testing double kernel arguments.\n"); - supports_double = 1; - } else { - cl_device_fp_config double_fp_config; - error = clGetDeviceInfo(deviceID, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(double_fp_config), &double_fp_config, NULL); - test_error(error, "clGetDeviceInfo for CL_DEVICE_DOUBLE_FP_CONFIG failed"); - if (double_fp_config != 0) - supports_double = 1; - else { - log_info(" o Device lacks extension 'cl_khr_fp64'\n"); - log_info(" o Not testing double kernel arguments.\n"); - supports_double = 0; - } - } - - if (is_extension_available(deviceID, "cl_khr_fp16")) { - log_info(" o Device claims extension 'cl_khr_fp16'\n"); - log_info(" o Expecting SUCCESS when testing halfn* kernel arguments.\n"); - supports_half = 1; - } else { - log_info(" o Device lacks extension 'cl_khr_fp16'\n"); - log_info(" o Not testing halfn* kernel arguments.\n"); - supports_half = 0; - } - - - int test_failed = 0; - - // Now create a test program using required arguments - log_info("Testing required kernel arguments...\n"); - error = test(deviceID, context, required_kernel_args, sizeof(required_kernel_args)/sizeof(required_kernel_args[0]), required_arg_info, sizeof(required_arg_info)/sizeof(required_arg_info[0])); - test_failed = (error) ? -1 : test_failed; - - if ( supports_images ) { - log_info("Testing optional image arguments...\n"); - error = test(deviceID, context, image_kernel_args, sizeof(image_kernel_args)/sizeof(image_kernel_args[0]), image_arg_info, sizeof(image_arg_info)/sizeof(image_arg_info[0])); - test_failed = (error) ? -1 : test_failed; - } - - if ( supports_double ) { - log_info("Testing optional double arguments...\n"); - error = test(deviceID, context, double_kernel_args, sizeof(double_kernel_args)/sizeof(double_kernel_args[0]), double_arg_info, sizeof(double_arg_info)/sizeof(double_arg_info[0])); - test_failed = (error) ? -1 : test_failed; - } - - if ( supports_half ) { - log_info("Testing optional half arguments...\n"); - error = test(deviceID, context, half_kernel_args, sizeof(half_kernel_args)/sizeof(half_kernel_args[0]), half_arg_info, sizeof(half_arg_info)/sizeof(half_arg_info[0])); - test_failed = (error) ? -1 : test_failed; - } - - return test_failed; -} - - diff --git a/test_conformance/api/test_kernel_attributes.cpp b/test_conformance/api/test_kernel_attributes.cpp new file mode 100644 index 0000000000..2e4e0a7f19 --- /dev/null +++ b/test_conformance/api/test_kernel_attributes.cpp @@ -0,0 +1,339 @@ +// +// Copyright (c) 2020 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include +#include +#include +#include +#include "procs.h" +#include "harness/errorHelpers.h" +#include "harness/typeWrappers.h" +#include "harness/parseParameters.h" + +using KernelAttributes = std::vector; + +static std::string generate_kernel_source(const KernelAttributes& attributes) +{ + std::string kernel; + for (auto attribute : attributes) + { + kernel += "__attribute__((" + attribute + "))\n"; + } + kernel += "__kernel void test_kernel(){}"; + return kernel; +} + + +using AttributePermutations = std::vector; + +// The following combinations have been chosen as they place each of the +// attribute types in the different orders that they can occur. While distinct +// permutations would provide a complete overview of the API the sheer number of +// combinations increases the runtime of this test by an unreasonable amount +AttributePermutations vect_tests; +AttributePermutations work_tests; +AttributePermutations reqd_tests; + +AttributePermutations vect_reqd_tests; +AttributePermutations work_vect_tests; +AttributePermutations reqd_work_tests; + +AttributePermutations vect_work_reqd_tests; +AttributePermutations work_reqd_vect_tests; +AttributePermutations reqd_vect_work_tests; + + +// Generate a vector with vec_type_hint() so that it can be used to +// generate different kernels +static KernelAttributes generate_vec_type_hint_data(cl_device_id deviceID) +{ + KernelAttributes vec_type_hint_data; + // TODO Test for signed vectors (char/short/int/etc) + std::vector vector_types = { "uchar", "ushort", "uint", + "float" }; + if (gHasLong) + { + vector_types.push_back("ulong"); + } + if (device_supports_half(deviceID)) + { + vector_types.push_back("half"); + } + if (device_supports_double(deviceID)) + { + vector_types.push_back("double"); + } + + const auto vector_sizes = { "2", "3", "4", "8", "16" }; + for (auto type : vector_types) + { + for (auto size : vector_sizes) + { + vec_type_hint_data.push_back("vec_type_hint(" + type + size + ")"); + } + } + return vec_type_hint_data; +} + + +struct WorkGroupDimensions +{ + int x; + int y; + int z; +}; + +// Generate vectors to store reqd_work_group_size() and +// work_group_size_hint() so that they can be used to generate +// different kernels +static KernelAttributes generate_reqd_work_group_size_data( + const std::vector& work_group_dimensions) +{ + KernelAttributes reqd_work_group_size_data; + for (auto dimension : work_group_dimensions) + { + reqd_work_group_size_data.push_back( + "reqd_work_group_size(" + std::to_string(dimension.x) + "," + + std::to_string(dimension.y) + "," + std::to_string(dimension.z) + + ")"); + } + return reqd_work_group_size_data; +} + +static KernelAttributes generate_work_group_size_data( + const std::vector& work_group_dimensions) +{ + KernelAttributes work_group_size_hint_data; + for (auto dimension : work_group_dimensions) + { + work_group_size_hint_data.push_back( + "work_group_size_hint(" + std::to_string(dimension.x) + "," + + std::to_string(dimension.y) + "," + std::to_string(dimension.z) + + ")"); + } + return work_group_size_hint_data; +} + +// Populate the Global Vectors which store individual Kernel Attributes +static void populate_single_attribute_tests( + // Vectors to store the different data that fill the attributes + const KernelAttributes& vec_type_hint_data, + const KernelAttributes& work_group_size_hint_data, + const KernelAttributes& reqd_work_group_size_data) +{ + for (auto vector_test : vec_type_hint_data) + { + // Initialise vec_type_hint attribute tests + vect_tests.push_back({ vector_test }); + } + for (auto work_group_test : work_group_size_hint_data) + { + + // Initialise work_group_size_hint attribute test + work_tests.push_back({ work_group_test }); + } + for (auto reqd_work_group_test : reqd_work_group_size_data) + { + + // Initialise reqd_work_group_size attribute tests + reqd_tests.push_back({ reqd_work_group_test }); + } +} + +// Populate the Global Vectors which store the different permutations of 2 +// Kernel Attributes +static void populate_double_attribute_tests( + const KernelAttributes& vec_type_hint_data, + const KernelAttributes& work_group_size_hint_data, + const KernelAttributes& reqd_work_group_size_data) +{ + for (auto vector_test : vec_type_hint_data) + { + for (auto work_group_test : work_group_size_hint_data) + { + // Initialise the tests for the permutation of work_group_size_hint + // combined with vec_type_hint + work_vect_tests.push_back({ work_group_test, vector_test }); + } + for (auto reqd_work_group_test : reqd_work_group_size_data) + { + // Initialise the tests for the permutation of vec_type_hint and + // reqd_work_group_size + vect_reqd_tests.push_back({ vector_test, reqd_work_group_test }); + } + } + for (auto work_group_test : work_group_size_hint_data) + { + + for (auto reqd_work_group_test : reqd_work_group_size_data) + { + // Initialse the tests for the permutation of reqd_work_group_size + // and work_group_size_hint + reqd_work_tests.push_back( + { reqd_work_group_test, work_group_test }); + } + } +} + +// Populate the Global Vectors which store the different permutations of 3 +// Kernel Attributes +static void populate_triple_attribute_tests( + const KernelAttributes& vec_type_hint_data, + const KernelAttributes& work_group_size_hint_data, + const KernelAttributes& reqd_work_group_size_data) +{ + for (auto vector_test : vec_type_hint_data) + { + for (auto work_group_test : work_group_size_hint_data) + { + for (auto reqd_work_group_test : reqd_work_group_size_data) + { + // Initialise the chosen permutations of 3 attributes + vect_work_reqd_tests.push_back( + { vector_test, work_group_test, reqd_work_group_test }); + work_reqd_vect_tests.push_back( + { work_group_test, reqd_work_group_test, vector_test }); + reqd_vect_work_tests.push_back( + { reqd_work_group_test, vector_test, work_group_test }); + } + } + } +} + +static const std::vector +generate_attribute_tests(const KernelAttributes& vec_type_hint_data, + const KernelAttributes& work_group_size_hint_data, + const KernelAttributes& reqd_work_group_size_data) +{ + populate_single_attribute_tests(vec_type_hint_data, + work_group_size_hint_data, + reqd_work_group_size_data); + populate_double_attribute_tests(vec_type_hint_data, + work_group_size_hint_data, + reqd_work_group_size_data); + populate_triple_attribute_tests(vec_type_hint_data, + work_group_size_hint_data, + reqd_work_group_size_data); + + // Store all of the filled vectors in a single structure + const std::vector all_tests = { + &vect_tests, &work_tests, &reqd_tests, + + &work_vect_tests, &vect_reqd_tests, &reqd_work_tests, + + &vect_work_reqd_tests, &work_reqd_vect_tests, &reqd_vect_work_tests + }; + return all_tests; +} + +static const std::vector +initialise_attribute_data(cl_device_id deviceID) +{ + // This vector stores different work group dimensions that can be used by + // the reqd_work_group_size and work_group_size_hint attributes. It + // currently only has a single value to minimise time complexity of the + // overall test but can be easily changed. + static const std::vector work_group_dimensions = { + { 1, 1, 1 } + }; + KernelAttributes vec_type_hint_data = generate_vec_type_hint_data(deviceID); + KernelAttributes work_group_size_hint_data = + generate_work_group_size_data(work_group_dimensions); + KernelAttributes reqd_work_group_size_data = + generate_reqd_work_group_size_data(work_group_dimensions); + + // Generate all the permutations of attributes to create different test + // suites + return generate_attribute_tests(vec_type_hint_data, + work_group_size_hint_data, + reqd_work_group_size_data); +} + +static bool run_test(cl_context context, cl_device_id deviceID, + const AttributePermutations& permutations) +{ + bool success = true; + for (auto attribute_permutation : permutations) + { + + std::string kernel_source_string = + generate_kernel_source(attribute_permutation); + const char* kernel_src = kernel_source_string.c_str(); + clProgramWrapper program; + clKernelWrapper kernel; + cl_int err = create_single_kernel_helper(context, &program, &kernel, 1, + &kernel_src, "test_kernel"); + test_error(err, "create_single_kernel_helper"); + + // Get the size of the kernel attribute string returned + size_t size = 0; + err = clGetKernelInfo(kernel, CL_KERNEL_ATTRIBUTES, 0, nullptr, &size); + test_error(err, "clGetKernelInfo"); + std::vector attributes(size); + err = clGetKernelInfo(kernel, CL_KERNEL_ATTRIBUTES, attributes.size(), + attributes.data(), nullptr); + test_error(err, "clGetKernelInfo"); + std::string attribute_string(attributes.data()); + attribute_string.erase( + std::remove(attribute_string.begin(), attribute_string.end(), ' '), + attribute_string.end()); + if (gCompilationMode != kOnline) + { + if (!attribute_string.empty()) + { + success = false; + log_error("Error: Expected an empty string\n"); + log_error("Attribute string reported as: %s\n", + attribute_string.c_str()); + } + } + else + { + bool permutation_success = true; + for (auto attribute : attribute_permutation) + { + if (attribute_string.find(attribute) == std::string::npos) + { + success = false; + permutation_success = false; + log_error("ERROR: did not find expected attribute: '%s'\n", + attribute.c_str()); + } + } + if (!permutation_success) + { + log_error("Attribute string reported as: %s\n", + attribute_string.c_str()); + } + } + } + return success; +} + +int test_kernel_attributes(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + bool success = true; + + // Vector to store all of the tests + const std::vector all_tests = + initialise_attribute_data(deviceID); + + for (auto permutations : all_tests) + { + success = success && run_test(context, deviceID, *permutations); + } + return success ? TEST_PASS : TEST_FAIL; +} diff --git a/test_conformance/api/test_mem_object_info.cpp b/test_conformance/api/test_mem_object_info.cpp index ccfeaafa13..2afe043768 100644 --- a/test_conformance/api/test_mem_object_info.cpp +++ b/test_conformance/api/test_mem_object_info.cpp @@ -348,14 +348,7 @@ int test_get_buffer_info( cl_device_id deviceID, cl_context context, cl_command_ TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_ASSOCIATED_MEMOBJECT, origObj, (cl_mem)bufferObject, "associated mem object", "%p", void * ) TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_OFFSET, offset, (size_t)( addressAlign ), "offset", "%ld", size_t ) - - clReleaseMemObject( subBufferObject ); - subBufferObject = NULL; - } - - clReleaseMemObject( bufferObject ); - bufferObject = NULL; } return CL_SUCCESS; diff --git a/test_conformance/api/test_mem_objects.cpp b/test_conformance/api/test_mem_objects.cpp index c29613f9fe..f1a4e99339 100644 --- a/test_conformance/api/test_mem_objects.cpp +++ b/test_conformance/api/test_mem_objects.cpp @@ -48,12 +48,7 @@ int test_mem_object_destructor_callback_single(clMemWrapper &memObject) test_error(error, "Unable to set destructor callback"); // Now release the buffer, which SHOULD call the callbacks - error = clReleaseMemObject(memObject); - test_error(error, "Unable to release test buffer"); - - // Note: since we manually released the mem wrapper, we need to set it to - // NULL to prevent a double-release - memObject = NULL; + memObject.reset(); // At this point, all three callbacks should have already been called int numErrors = 0; diff --git a/test_conformance/api/test_null_buffer_arg.cpp b/test_conformance/api/test_null_buffer_arg.cpp index ba43f183ea..d412d4eac6 100644 --- a/test_conformance/api/test_null_buffer_arg.cpp +++ b/test_conformance/api/test_null_buffer_arg.cpp @@ -157,14 +157,13 @@ int test_null_buffer_arg(cl_device_id device, cl_context context, // prep kernel: if (gIsEmbedded) - status = create_single_kernel_helper(context, &program, NULL, 1, &kernel_string, NULL); + status = create_single_kernel_helper(context, &program, &kernel, 1, + &kernel_string, "test_kernel"); else - status = create_single_kernel_helper(context, &program, NULL, 1, &kernel_string_long, NULL); + status = create_single_kernel_helper( + context, &program, &kernel, 1, &kernel_string_long, "test_kernel"); - test_error(status, "Unable to build test program"); - - kernel = clCreateKernel(program, "test_kernel", &status); - test_error(status, "CreateKernel failed."); + test_error(status, "Unable to create kernel"); cl_mem dev_src = clCreateBuffer(context, CL_MEM_READ_ONLY, NITEMS*sizeof(cl_float), NULL, NULL); diff --git a/test_conformance/api/test_queries.cpp b/test_conformance/api/test_queries.cpp index ed01e89497..30b5706f6b 100644 --- a/test_conformance/api/test_queries.cpp +++ b/test_conformance/api/test_queries.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -19,6 +19,7 @@ #include #include #include +#include int test_get_platform_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { @@ -131,86 +132,91 @@ int test_get_platform_info(cl_device_id deviceID, cl_context context, cl_command return 0; } -int test_get_sampler_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +template +int sampler_param_test(cl_sampler sampler, cl_sampler_info param_name, + T expected, const char *name) { - int error; size_t size; - - PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID ) - - cl_sampler_properties properties[] = { - CL_SAMPLER_NORMALIZED_COORDS, CL_TRUE, - CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP, - CL_SAMPLER_FILTER_MODE, CL_FILTER_LINEAR, - 0 }; - clSamplerWrapper sampler = clCreateSamplerWithProperties(context, properties, &error); - test_error( error, "Unable to create sampler to test with" ); - - cl_uint refCount; - error = clGetSamplerInfo( sampler, CL_SAMPLER_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size ); - test_error( error, "Unable to get sampler ref count" ); - if( size != sizeof( refCount ) ) + T val; + int error = clGetSamplerInfo(sampler, param_name, sizeof(val), &val, &size); + test_error(error, "Unable to get sampler info"); + if (val != expected) { - log_error( "ERROR: Returned size of sampler refcount does not validate! (expected %d, got %d)\n", (int)sizeof( refCount ), (int)size ); - return -1; - } - - cl_context otherCtx; - error = clGetSamplerInfo( sampler, CL_SAMPLER_CONTEXT, sizeof( otherCtx ), &otherCtx, &size ); - test_error( error, "Unable to get sampler context" ); - if( otherCtx != context ) - { - log_error( "ERROR: Sampler context does not validate! (expected %p, got %p)\n", context, otherCtx ); - return -1; + test_fail("ERROR: Sampler %s did not validate!\n", name); } - if( size != sizeof( otherCtx ) ) + if (size != sizeof(val)) { - log_error( "ERROR: Returned size of sampler context does not validate! (expected %d, got %d)\n", (int)sizeof( otherCtx ), (int)size ); - return -1; + test_fail("ERROR: Returned size of sampler %s does not validate! " + "(expected %d, got %d)\n", + name, (int)sizeof(val), (int)size); } + return 0; +} - cl_addressing_mode mode; - error = clGetSamplerInfo( sampler, CL_SAMPLER_ADDRESSING_MODE, sizeof( mode ), &mode, &size ); - test_error( error, "Unable to get sampler addressing mode" ); - if( mode != CL_ADDRESS_CLAMP ) - { - log_error( "ERROR: Sampler addressing mode does not validate! (expected %d, got %d)\n", (int)CL_ADDRESS_CLAMP, (int)mode ); - return -1; - } - if( size != sizeof( mode ) ) - { - log_error( "ERROR: Returned size of sampler addressing mode does not validate! (expected %d, got %d)\n", (int)sizeof( mode ), (int)size ); - return -1; - } +static cl_int normalized_coord_values[] = { CL_TRUE, CL_FALSE }; +static cl_addressing_mode addressing_mode_values[] = { + CL_ADDRESS_NONE, CL_ADDRESS_CLAMP_TO_EDGE, CL_ADDRESS_CLAMP, + CL_ADDRESS_REPEAT, CL_ADDRESS_MIRRORED_REPEAT +}; +static cl_filter_mode filter_mode_values[] = { CL_FILTER_NEAREST, + CL_FILTER_LINEAR }; - cl_filter_mode fmode; - error = clGetSamplerInfo( sampler, CL_SAMPLER_FILTER_MODE, sizeof( fmode ), &fmode, &size ); - test_error( error, "Unable to get sampler filter mode" ); - if( fmode != CL_FILTER_LINEAR ) - { - log_error( "ERROR: Sampler filter mode does not validate! (expected %d, got %d)\n", (int)CL_FILTER_LINEAR, (int)fmode ); - return -1; - } - if( size != sizeof( fmode ) ) - { - log_error( "ERROR: Returned size of sampler filter mode does not validate! (expected %d, got %d)\n", (int)sizeof( fmode ), (int)size ); - return -1; - } +int test_sampler_params(cl_device_id deviceID, cl_context context, + bool is_compatibility, int norm_coord_num, + int addr_mod_num, int filt_mod_num) +{ + cl_uint refCount; + size_t size; + int error; - cl_int norm; - error = clGetSamplerInfo( sampler, CL_SAMPLER_NORMALIZED_COORDS, sizeof( norm ), &norm, &size ); - test_error( error, "Unable to get sampler normalized flag" ); - if( norm != CL_TRUE ) + clSamplerWrapper sampler; + cl_sampler_properties properties[] = { + CL_SAMPLER_NORMALIZED_COORDS, + normalized_coord_values[norm_coord_num], + CL_SAMPLER_ADDRESSING_MODE, + addressing_mode_values[addr_mod_num], + CL_SAMPLER_FILTER_MODE, + filter_mode_values[filt_mod_num], + 0 + }; + + if (is_compatibility) { - log_error( "ERROR: Sampler normalized flag does not validate! (expected %d, got %d)\n", (int)CL_TRUE, (int)norm ); - return -1; + sampler = + clCreateSampler(context, normalized_coord_values[norm_coord_num], + addressing_mode_values[addr_mod_num], + filter_mode_values[filt_mod_num], &error); + test_error(error, "Unable to create sampler to test with"); } - if( size != sizeof( norm ) ) + else { - log_error( "ERROR: Returned size of sampler normalized flag does not validate! (expected %d, got %d)\n", (int)sizeof( norm ), (int)size ); - return -1; + sampler = clCreateSamplerWithProperties(context, properties, &error); + test_error(error, "Unable to create sampler to test with"); } + error = clGetSamplerInfo(sampler, CL_SAMPLER_REFERENCE_COUNT, + sizeof(refCount), &refCount, &size); + test_error(error, "Unable to get sampler ref count"); + test_assert_error(size == sizeof(refCount), + "Returned size of sampler refcount does not validate!\n"); + + error = sampler_param_test(sampler, CL_SAMPLER_CONTEXT, context, "context"); + test_error(error, "param checking failed"); + + error = sampler_param_test(sampler, CL_SAMPLER_ADDRESSING_MODE, + addressing_mode_values[addr_mod_num], + "addressing mode"); + test_error(error, "param checking failed"); + + error = sampler_param_test(sampler, CL_SAMPLER_FILTER_MODE, + filter_mode_values[filt_mod_num], "filter mode"); + test_error(error, "param checking failed"); + + error = sampler_param_test(sampler, CL_SAMPLER_NORMALIZED_COORDS, + normalized_coord_values[norm_coord_num], + "normalized coords"); + test_error(error, "param checking failed"); + Version version = get_device_cl_version(deviceID); if (version >= Version(3, 0)) { @@ -226,97 +232,251 @@ int test_get_sampler_info(cl_device_id deviceID, cl_context context, cl_command_ error, "clGetSamplerInfo failed asking for CL_SAMPLER_PROPERTIES size."); - if (set_size != test_properties.size() * sizeof(cl_sampler_properties)) + if (is_compatibility) { - log_error("ERROR: CL_SAMPLER_PROPERTIES size is %d, expected %d.\n", - set_size, - test_properties.size() * sizeof(cl_sampler_properties)); - return TEST_FAIL; + if (set_size != 0) + { + log_error( + "ERROR: CL_SAMPLER_PROPERTIES size is %d, expected 0\n", + set_size); + return TEST_FAIL; + } + } + else + { + if (set_size + != test_properties.size() * sizeof(cl_sampler_properties)) + { + log_error( + "ERROR: CL_SAMPLER_PROPERTIES size is %d, expected %d.\n", + set_size, + test_properties.size() * sizeof(cl_sampler_properties)); + return TEST_FAIL; + } + + cl_uint number_of_props = set_size / sizeof(cl_sampler_properties); + check_properties.resize(number_of_props); + error = clGetSamplerInfo(sampler, CL_SAMPLER_PROPERTIES, set_size, + check_properties.data(), 0); + test_error( + error, + "clGetSamplerInfo failed asking for CL_SAMPLER_PROPERTIES."); + + error = compareProperties(check_properties, test_properties); + test_error(error, "checkProperties mismatch."); } - - cl_uint number_of_props = set_size / sizeof(cl_sampler_properties); - check_properties.resize(number_of_props); - error = clGetSamplerInfo(sampler, CL_SAMPLER_PROPERTIES, set_size, - check_properties.data(), 0); - test_error(error, - "clGetSamplerInfo failed asking for CL_SAMPLER_PROPERTIES."); - - error = compareProperties(check_properties, test_properties); - test_error(error, "checkProperties mismatch."); } + return 0; +} +int get_sampler_info_params(cl_device_id deviceID, cl_context context, + bool is_compatibility) +{ + for (int norm_coord_num = 0; + norm_coord_num < ARRAY_SIZE(normalized_coord_values); norm_coord_num++) + { + for (int addr_mod_num = 0; + addr_mod_num < ARRAY_SIZE(addressing_mode_values); addr_mod_num++) + { + if ((normalized_coord_values[norm_coord_num] == CL_FALSE) + && ((addressing_mode_values[addr_mod_num] == CL_ADDRESS_REPEAT) + || (addressing_mode_values[addr_mod_num] + == CL_ADDRESS_MIRRORED_REPEAT))) + { + continue; + } + for (int filt_mod_num = 0; + filt_mod_num < ARRAY_SIZE(filter_mode_values); filt_mod_num++) + { + int err = test_sampler_params(deviceID, context, + is_compatibility, norm_coord_num, + addr_mod_num, filt_mod_num); + test_error(err, "testing clGetSamplerInfo params failed"); + } + } + } return 0; } +int test_get_sampler_info(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + int error; + PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID) -#define TEST_COMMAND_QUEUE_PARAM( queue, paramName, val, expected, name, type, cast ) \ -error = clGetCommandQueueInfo( queue, paramName, sizeof( val ), &val, &size ); \ -test_error( error, "Unable to get command queue " name ); \ -if( val != expected ) \ -{ \ -log_error( "ERROR: Command queue " name " did not validate! (expected " type ", got " type ")\n", (cast)expected, (cast)val ); \ -return -1; \ -} \ -if( size != sizeof( val ) ) \ -{ \ -log_error( "ERROR: Returned size of command queue " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size ); \ -return -1; \ + error = get_sampler_info_params(deviceID, context, false); + test_error(error, "Test Failed"); + + return 0; } -int test_get_command_queue_info(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements) +int test_get_sampler_info_compatibility(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { int error; + PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID) + + error = get_sampler_info_params(deviceID, context, true); + test_error(error, "Test Failed"); + + return 0; +} + +template +int command_queue_param_test(cl_command_queue queue, + cl_command_queue_info param_name, T expected, + const char *name) +{ size_t size; + T val; + int error = + clGetCommandQueueInfo(queue, param_name, sizeof(val), &val, &size); + test_error(error, "Unable to get command queue info"); + if (val != expected) + { + test_fail("ERROR: Command queue %s did not validate!\n", name); + } + if (size != sizeof(val)) + { + test_fail("ERROR: Returned size of command queue %s does not validate! " + "(expected %d, got %d)\n", + name, (int)sizeof(val), (int)size); + } + return 0; +} - cl_queue_properties device_props; - cl_queue_properties queue_props[] = {CL_QUEUE_PROPERTIES,0,0}; +int check_get_command_queue_info_params(cl_device_id deviceID, + cl_context context, + bool is_compatibility) +{ + const cl_command_queue_properties host_optional[] = { + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, + CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE + }; + + const cl_command_queue_properties device_required[] = { + CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, + CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE + | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, + CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT + | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, + CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE + | CL_QUEUE_ON_DEVICE_DEFAULT + | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE + }; + + const size_t host_optional_size = ARRAY_SIZE(host_optional); + const size_t device_required_size = ARRAY_SIZE(device_required); - clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, sizeof(device_props), &device_props, NULL); - log_info("CL_DEVICE_QUEUE_ON_HOST_PROPERTIES is %d\n", (int)device_props); + Version version = get_device_cl_version(deviceID); - // Mask off vendor extension properties. Only test standard OpenCL properties - device_props &= CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_PROFILING_ENABLE; + const cl_device_info host_queue_query = version >= Version(2, 0) + ? CL_DEVICE_QUEUE_ON_HOST_PROPERTIES + : CL_DEVICE_QUEUE_PROPERTIES; - queue_props[1] = device_props; - clCommandQueueWrapper queue = clCreateCommandQueueWithProperties( context, deviceID, &queue_props[0], &error ); - test_error( error, "Unable to create command queue to test with" ); + cl_queue_properties host_queue_props = 0; + int error = + clGetDeviceInfo(deviceID, host_queue_query, sizeof(host_queue_props), + &host_queue_props, NULL); + test_error(error, "clGetDeviceInfo failed"); + log_info("CL_DEVICE_QUEUE_ON_HOST_PROPERTIES is %d\n", host_queue_props); - cl_uint refCount; - error = clGetCommandQueueInfo( queue, CL_QUEUE_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size ); - test_error( error, "Unable to get command queue reference count" ); - if( size != sizeof( refCount ) ) + cl_queue_properties device_queue_props = 0; + if (version >= Version(2, 0)) { - log_error( "ERROR: Returned size of command queue reference count does not validate! (expected %d, got %d)\n", (int)sizeof( refCount ), (int)size ); - return -1; + error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, + sizeof(device_queue_props), &device_queue_props, + NULL); + test_error(error, "clGetDeviceInfo failed"); + log_info("CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES is %d\n", + device_queue_props); } - cl_context otherCtx; - TEST_COMMAND_QUEUE_PARAM( queue, CL_QUEUE_CONTEXT, otherCtx, context, "context", "%p", cl_context ) + bool out_of_order_supported = + host_queue_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; - cl_device_id otherDevice; - error = clGetCommandQueueInfo( queue, CL_QUEUE_DEVICE, sizeof(otherDevice), &otherDevice, &size); - test_error(error, "clGetCommandQueue failed."); + bool on_device_supported = + (version >= Version(2, 0) && version < Version(3, 0)) + || (version >= Version(3, 0) && device_queue_props != 0); - if (size != sizeof(cl_device_id)) { - log_error( " ERROR: Returned size of command queue CL_QUEUE_DEVICE does not validate! (expected %d, got %d)\n", (int)sizeof( otherDevice ), (int)size ); - return -1; - } + // test device queues if the device and the API under test support it + bool test_on_device = on_device_supported && !is_compatibility; - /* Since the device IDs are opaque types we check the CL_DEVICE_VENDOR_ID which is unique for identical hardware. */ - cl_uint otherDevice_vid, deviceID_vid; - error = clGetDeviceInfo(otherDevice, CL_DEVICE_VENDOR_ID, sizeof(otherDevice_vid), &otherDevice_vid, NULL ); - test_error( error, "Unable to get device CL_DEVICE_VENDOR_ID" ); - error = clGetDeviceInfo(deviceID, CL_DEVICE_VENDOR_ID, sizeof(deviceID_vid), &deviceID_vid, NULL ); - test_error( error, "Unable to get device CL_DEVICE_VENDOR_ID" ); + std::vector queue_props{ 0, + CL_QUEUE_PROFILING_ENABLE }; - if( otherDevice_vid != deviceID_vid ) + if (out_of_order_supported) { - log_error( "ERROR: Incorrect device returned for queue! (Expected vendor ID 0x%x, got 0x%x)\n", deviceID_vid, otherDevice_vid ); - return -1; + queue_props.insert(queue_props.end(), &host_optional[0], + &host_optional[host_optional_size]); + }; + + cl_queue_properties queue_props_arg[] = { CL_QUEUE_PROPERTIES, 0, 0 }; + + if (test_on_device) + { + queue_props.insert(queue_props.end(), &device_required[0], + &device_required[device_required_size]); + }; + + for (cl_queue_properties props : queue_props) + { + + queue_props_arg[1] = props; + + clCommandQueueWrapper queue; + if (is_compatibility) + { + queue = clCreateCommandQueue(context, deviceID, props, &error); + test_error(error, "Unable to create command queue to test with"); + } + else + { + queue = clCreateCommandQueueWithProperties(context, deviceID, + queue_props_arg, &error); + test_error(error, "Unable to create command queue to test with"); + } + + cl_uint refCount; + size_t size; + error = clGetCommandQueueInfo(queue, CL_QUEUE_REFERENCE_COUNT, + sizeof(refCount), &refCount, &size); + test_error(error, "Unable to get command queue reference count"); + test_assert_error(size == sizeof(refCount), + "Returned size of command queue reference count does " + "not validate!\n"); + + error = command_queue_param_test(queue, CL_QUEUE_CONTEXT, context, + "context"); + test_error(error, "param checking failed"); + + error = command_queue_param_test(queue, CL_QUEUE_DEVICE, deviceID, + "deviceID"); + test_error(error, "param checking failed"); + + error = command_queue_param_test(queue, CL_QUEUE_PROPERTIES, + queue_props_arg[1], "properties"); + test_error(error, "param checking failed"); } + return 0; +} - cl_command_queue_properties props; - TEST_COMMAND_QUEUE_PARAM( queue, CL_QUEUE_PROPERTIES, props, (unsigned int)( device_props ), "properties", "%d", unsigned int ) +int test_get_command_queue_info(cl_device_id deviceID, cl_context context, + cl_command_queue ignoreQueue, int num_elements) +{ + int error = check_get_command_queue_info_params(deviceID, context, false); + test_error(error, "Test Failed"); + return 0; +} +int test_get_command_queue_info_compatibility(cl_device_id deviceID, + cl_context context, + cl_command_queue ignoreQueue, + int num_elements) +{ + int error = check_get_command_queue_info_params(deviceID, context, true); + test_error(error, "Test Failed"); return 0; } @@ -679,5 +839,3 @@ int test_kernel_required_group_size(cl_device_id deviceID, cl_context context, c return 0; } - - diff --git a/test_conformance/api/test_queries_compatibility.cpp b/test_conformance/api/test_queries_compatibility.cpp deleted file mode 100644 index c53fba894d..0000000000 --- a/test_conformance/api/test_queries_compatibility.cpp +++ /dev/null @@ -1,169 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "testBase.h" -#include "harness/imageHelpers.h" -#include -#include - -int test_get_sampler_info_compatibility(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - int error; - size_t size; - - PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID ) - - clSamplerWrapper sampler = clCreateSampler( context, CL_TRUE, CL_ADDRESS_CLAMP, CL_FILTER_LINEAR, &error ); - test_error( error, "Unable to create sampler to test with" ); - - cl_uint refCount; - error = clGetSamplerInfo( sampler, CL_SAMPLER_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size ); - test_error( error, "Unable to get sampler ref count" ); - if( size != sizeof( refCount ) ) - { - log_error( "ERROR: Returned size of sampler refcount does not validate! (expected %d, got %d)\n", (int)sizeof( refCount ), (int)size ); - return -1; - } - - cl_context otherCtx; - error = clGetSamplerInfo( sampler, CL_SAMPLER_CONTEXT, sizeof( otherCtx ), &otherCtx, &size ); - test_error( error, "Unable to get sampler context" ); - if( otherCtx != context ) - { - log_error( "ERROR: Sampler context does not validate! (expected %p, got %p)\n", context, otherCtx ); - return -1; - } - if( size != sizeof( otherCtx ) ) - { - log_error( "ERROR: Returned size of sampler context does not validate! (expected %d, got %d)\n", (int)sizeof( otherCtx ), (int)size ); - return -1; - } - - cl_addressing_mode mode; - error = clGetSamplerInfo( sampler, CL_SAMPLER_ADDRESSING_MODE, sizeof( mode ), &mode, &size ); - test_error( error, "Unable to get sampler addressing mode" ); - if( mode != CL_ADDRESS_CLAMP ) - { - log_error( "ERROR: Sampler addressing mode does not validate! (expected %d, got %d)\n", (int)CL_ADDRESS_CLAMP, (int)mode ); - return -1; - } - if( size != sizeof( mode ) ) - { - log_error( "ERROR: Returned size of sampler addressing mode does not validate! (expected %d, got %d)\n", (int)sizeof( mode ), (int)size ); - return -1; - } - - cl_filter_mode fmode; - error = clGetSamplerInfo( sampler, CL_SAMPLER_FILTER_MODE, sizeof( fmode ), &fmode, &size ); - test_error( error, "Unable to get sampler filter mode" ); - if( fmode != CL_FILTER_LINEAR ) - { - log_error( "ERROR: Sampler filter mode does not validate! (expected %d, got %d)\n", (int)CL_FILTER_LINEAR, (int)fmode ); - return -1; - } - if( size != sizeof( fmode ) ) - { - log_error( "ERROR: Returned size of sampler filter mode does not validate! (expected %d, got %d)\n", (int)sizeof( fmode ), (int)size ); - return -1; - } - - cl_int norm; - error = clGetSamplerInfo( sampler, CL_SAMPLER_NORMALIZED_COORDS, sizeof( norm ), &norm, &size ); - test_error( error, "Unable to get sampler normalized flag" ); - if( norm != CL_TRUE ) - { - log_error( "ERROR: Sampler normalized flag does not validate! (expected %d, got %d)\n", (int)CL_TRUE, (int)norm ); - return -1; - } - if( size != sizeof( norm ) ) - { - log_error( "ERROR: Returned size of sampler normalized flag does not validate! (expected %d, got %d)\n", (int)sizeof( norm ), (int)size ); - return -1; - } - - return 0; -} - -#define TEST_COMMAND_QUEUE_PARAM( queue, paramName, val, expected, name, type, cast ) \ -error = clGetCommandQueueInfo( queue, paramName, sizeof( val ), &val, &size ); \ -test_error( error, "Unable to get command queue " name ); \ -if( val != expected ) \ -{ \ -log_error( "ERROR: Command queue " name " did not validate! (expected " type ", got " type ")\n", (cast)expected, (cast)val ); \ -return -1; \ -} \ -if( size != sizeof( val ) ) \ -{ \ -log_error( "ERROR: Returned size of command queue " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size ); \ -return -1; \ -} - -int test_get_command_queue_info_compatibility(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements) -{ - int error; - size_t size; - - cl_command_queue_properties device_props; - clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_PROPERTIES, sizeof(device_props), &device_props, NULL); - log_info("CL_DEVICE_QUEUE_PROPERTIES is %d\n", (int)device_props); - - // Mask off vendor extension properties. Only test standard OpenCL - // properties - device_props &= - CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE; - - clCommandQueueWrapper queue = clCreateCommandQueue( context, deviceID, device_props, &error ); - test_error( error, "Unable to create command queue to test with" ); - - cl_uint refCount; - error = clGetCommandQueueInfo( queue, CL_QUEUE_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size ); - test_error( error, "Unable to get command queue reference count" ); - if( size != sizeof( refCount ) ) - { - log_error( "ERROR: Returned size of command queue reference count does not validate! (expected %d, got %d)\n", (int)sizeof( refCount ), (int)size ); - return -1; - } - - cl_context otherCtx; - TEST_COMMAND_QUEUE_PARAM( queue, CL_QUEUE_CONTEXT, otherCtx, context, "context", "%p", cl_context ) - - cl_device_id otherDevice; - error = clGetCommandQueueInfo( queue, CL_QUEUE_DEVICE, sizeof(otherDevice), &otherDevice, &size); - test_error(error, "clGetCommandQueue failed."); - - if (size != sizeof(cl_device_id)) { - log_error( " ERROR: Returned size of command queue CL_QUEUE_DEVICE does not validate! (expected %d, got %d)\n", (int)sizeof( otherDevice ), (int)size ); - return -1; - } - - /* Since the device IDs are opaque types we check the CL_DEVICE_VENDOR_ID which is unique for identical hardware. */ - cl_uint otherDevice_vid, deviceID_vid; - error = clGetDeviceInfo(otherDevice, CL_DEVICE_VENDOR_ID, sizeof(otherDevice_vid), &otherDevice_vid, NULL ); - test_error( error, "Unable to get device CL_DEVICE_VENDOR_ID" ); - error = clGetDeviceInfo(deviceID, CL_DEVICE_VENDOR_ID, sizeof(deviceID_vid), &deviceID_vid, NULL ); - test_error( error, "Unable to get device CL_DEVICE_VENDOR_ID" ); - - if( otherDevice_vid != deviceID_vid ) - { - log_error( "ERROR: Incorrect device returned for queue! (Expected vendor ID 0x%x, got 0x%x)\n", deviceID_vid, otherDevice_vid ); - return -1; - } - - cl_command_queue_properties props; - TEST_COMMAND_QUEUE_PARAM( queue, CL_QUEUE_PROPERTIES, props, (unsigned int)( device_props ), "properties", "%d", unsigned int ) - - return 0; -} - diff --git a/test_conformance/api/test_retain.cpp b/test_conformance/api/test_retain.cpp index cf065bcdb7..6e66c7dabc 100644 --- a/test_conformance/api/test_retain.cpp +++ b/test_conformance/api/test_retain.cpp @@ -251,11 +251,9 @@ int test_retain_mem_object_set_kernel_arg(cl_device_id deviceID, cl_context cont err = clSetMemObjectDestructorCallback( buffer, callback, nullptr ); test_error( err, "Unable to set destructor callback" ); - err = create_single_kernel_helper( context, &program, nullptr, 1, testProgram, nullptr ); - test_error( err, "Unable to build sample program" ); - - kernel = clCreateKernel( program, "sample_test", &err ); - test_error( err, "Unable to create sample_test kernel" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + testProgram, "sample_test"); + test_error(err, "Unable to build sample program and sample_test kernel"); err = clSetKernelArg( kernel, 0, sizeof(cl_mem), &buffer ); test_error( err, "Unable to set kernel argument" ); diff --git a/test_conformance/api/test_retain_program.cpp b/test_conformance/api/test_retain_program.cpp index aa9c8b36e5..b9fc8b7e24 100644 --- a/test_conformance/api/test_retain_program.cpp +++ b/test_conformance/api/test_retain_program.cpp @@ -28,14 +28,11 @@ int test_release_kernel_order(cl_device_id deviceID, cl_context context, cl_comm int error; const char *testProgram[] = { "__kernel void sample_test(__global int *data){}" }; - /* Create a test program */ - error = create_single_kernel_helper(context, &program, NULL, 1, testProgram, NULL); + /* Create a test program and kernel from it */ + error = create_single_kernel_helper(context, &program, &kernel, 1, + testProgram, "sample_test"); test_error( error, "Unable to build sample program to test with" ); - /* And create a kernel from it */ - kernel = clCreateKernel( program, "sample_test", &error ); - test_error( error, "Unable to create kernel" ); - /* Now try freeing the program first, then the kernel. If refcounts are right, this should work just fine */ clReleaseProgram( program ); clReleaseKernel( kernel ); diff --git a/test_conformance/api/test_sub_group_dispatch.cpp b/test_conformance/api/test_sub_group_dispatch.cpp index 387d6c3906..01d0ffa380 100644 --- a/test_conformance/api/test_sub_group_dispatch.cpp +++ b/test_conformance/api/test_sub_group_dispatch.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -95,7 +95,9 @@ int test_sub_group_dispatch(cl_device_id deviceID, cl_context context, cl_comman } } - error = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, subgroup_dispatch_kernel, "subgroup_dispatch_kernel", "-cl-std=CL2.0"); + error = create_single_kernel_helper(context, &program, &kernel, 1, + subgroup_dispatch_kernel, + "subgroup_dispatch_kernel"); if (error != 0) return error; diff --git a/test_conformance/atomics/test_atomics.cpp b/test_conformance/atomics/test_atomics.cpp index 5f4c0943b9..c0c0136319 100644 --- a/test_conformance/atomics/test_atomics.cpp +++ b/test_conformance/atomics/test_atomics.cpp @@ -200,6 +200,10 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof( workSize ), &workSize, NULL ); test_error( error, "Unable to obtain max work group size for device and kernel combo" ); + // Limit workSize to avoid extremely large local buffer size and slow + // run. + if (workSize > 65536) workSize = 65536; + // "workSize" is limited to that of the first dimension as only a 1DRange is executed. if( maxSizes[0] < workSize ) { @@ -1004,8 +1008,7 @@ cl_long test_atomic_and_result_long( size_t size, cl_long *startRefValues, size_ // Last item doesn't get and'ed on every bit, so we have to mask away size_t numBits = (size_t)size - whichResult * 64; cl_long bits = (cl_long)0xffffffffffffffffLL; - for( size_t i = 0; i < numBits; i++ ) - bits &= ~( 1 << i ); + for (size_t i = 0; i < numBits; i++) bits &= ~(1LL << i); return bits; } @@ -1086,18 +1089,16 @@ int test_atomic_or(cl_device_id deviceID, cl_context context, cl_command_queue q #pragma mark ---- xor const char atom_xor_core[] = -" size_t numBits = sizeof( destMemory[0] ) * 8;\n" -" int bitIndex = tid & ( numBits - 1 );\n" -"\n" -" oldValues[tid] = atom_xor( &destMemory[0], 1 << bitIndex );\n" -; + " size_t numBits = sizeof( destMemory[0] ) * 8;\n" + " int bitIndex = tid & ( numBits - 1 );\n" + "\n" + " oldValues[tid] = atom_xor( &destMemory[0], 1L << bitIndex );\n"; const char atomic_xor_core[] = -" size_t numBits = sizeof( destMemory[0] ) * 8;\n" -" int bitIndex = tid & ( numBits - 1 );\n" -"\n" -" oldValues[tid] = atomic_xor( &destMemory[0], 1 << bitIndex );\n" -; + " size_t numBits = sizeof( destMemory[0] ) * 8;\n" + " int bitIndex = tid & ( numBits - 1 );\n" + "\n" + " oldValues[tid] = atomic_xor( &destMemory[0], 1L << bitIndex );\n"; cl_int test_atomic_xor_result_int( size_t size, cl_int *startRefValues, size_t whichResult ) { diff --git a/test_conformance/basic/CMakeLists.txt b/test_conformance/basic/CMakeLists.txt index 27178246bb..c5c4b5f0cc 100644 --- a/test_conformance/basic/CMakeLists.txt +++ b/test_conformance/basic/CMakeLists.txt @@ -37,6 +37,9 @@ set(${MODULE_NAME}_SOURCES test_work_item_functions.cpp test_astype.cpp test_async_copy.cpp + test_async_copy2D.cpp + test_async_copy3D.cpp + test_async_copy_fence.cpp test_sizeof.cpp test_vector_creation.cpp test_vector_swizzle.cpp diff --git a/test_conformance/basic/main.cpp b/test_conformance/basic/main.cpp index d1a35faec7..86c3cec359 100644 --- a/test_conformance/basic/main.cpp +++ b/test_conformance/basic/main.cpp @@ -113,14 +113,24 @@ test_definition test_list[] = { ADD_TEST(async_copy_local_to_global), ADD_TEST(async_strided_copy_global_to_local), ADD_TEST(async_strided_copy_local_to_global), + ADD_TEST(async_copy_global_to_local2D), + ADD_TEST(async_copy_local_to_global2D), + ADD_TEST(async_copy_global_to_local3D), + ADD_TEST(async_copy_local_to_global3D), + ADD_TEST(async_work_group_copy_fence_import_after_export_aliased_local), + ADD_TEST(async_work_group_copy_fence_import_after_export_aliased_global), + ADD_TEST( + async_work_group_copy_fence_import_after_export_aliased_global_and_local), + ADD_TEST(async_work_group_copy_fence_export_after_import_aliased_local), + ADD_TEST(async_work_group_copy_fence_export_after_import_aliased_global), + ADD_TEST( + async_work_group_copy_fence_export_after_import_aliased_global_and_local), ADD_TEST(prefetch), - ADD_TEST(kernel_call_kernel_function), ADD_TEST(host_numeric_constants), ADD_TEST(kernel_numeric_constants), ADD_TEST(kernel_limit_constants), ADD_TEST(kernel_preprocessor_macros), - ADD_TEST(parameter_types), ADD_TEST(vector_creation), ADD_TEST(vector_swizzle), diff --git a/test_conformance/basic/procs.h b/test_conformance/basic/procs.h index bdb7d6a430..4a01a8cbb0 100644 --- a/test_conformance/basic/procs.h +++ b/test_conformance/basic/procs.h @@ -115,6 +115,42 @@ extern int test_async_copy_global_to_local(cl_device_id deviceID, cl_contex extern int test_async_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_async_strided_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_async_strided_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); +extern int test_async_copy_global_to_local2D(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_async_copy_local_to_global2D(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_async_copy_global_to_local3D(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_async_copy_local_to_global3D(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_async_work_group_copy_fence_import_after_export_aliased_local( + cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements); +extern int test_async_work_group_copy_fence_import_after_export_aliased_global( + cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements); +extern int +test_async_work_group_copy_fence_import_after_export_aliased_global_and_local( + cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements); +extern int test_async_work_group_copy_fence_export_after_import_aliased_local( + cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements); +extern int test_async_work_group_copy_fence_export_after_import_aliased_global( + cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements); +extern int +test_async_work_group_copy_fence_export_after_import_aliased_global_and_local( + cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements); extern int test_prefetch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_host_numeric_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); diff --git a/test_conformance/basic/test_async_copy2D.cpp b/test_conformance/basic/test_async_copy2D.cpp new file mode 100644 index 0000000000..fafcac837a --- /dev/null +++ b/test_conformance/basic/test_async_copy2D.cpp @@ -0,0 +1,449 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "../../test_common/harness/compat.h" + +#include +#include +#include +#include +#include +#include + +#include "../../test_common/harness/conversions.h" +#include "procs.h" + +static const char *async_global_to_local_kernel2D = + "#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable\n" + "%s\n" // optional pragma string + "__kernel void test_fn( const __global %s *src, __global %s *dst, __local " + "%s *localBuffer, int numElementsPerLine, int lineCopiesPerWorkgroup, int " + "lineCopiesPerWorkItem, int srcStride, int dstStride )\n" + "{\n" + " int i, j;\n" + // Zero the local storage first + " for(i=0; i max_global_mem_size / 2) + max_alloc_size = max_global_mem_size / 2; + + unsigned int num_of_compute_devices; + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, + sizeof(num_of_compute_devices), + &num_of_compute_devices, NULL); + test_error(error, + "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed."); + + char programSource[4096]; + programSource[0] = 0; + char *programPtr; + + sprintf(programSource, kernelCode, + vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" + : "", + vecNameString, vecNameString, vecNameString, vecNameString, + get_explicit_type_name(vecType), vecNameString, vecNameString); + // log_info("program: %s\n", programSource); + programPtr = programSource; + + error = create_single_kernel_helper(context, &program, &kernel, 1, + (const char **)&programPtr, "test_fn"); + test_error(error, "Unable to create testing kernel"); + + size_t max_workgroup_size; + error = clGetKernelWorkGroupInfo( + kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_workgroup_size), + &max_workgroup_size, NULL); + test_error( + error, + "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE."); + + size_t max_local_workgroup_size[3]; + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, + sizeof(max_local_workgroup_size), + max_local_workgroup_size, NULL); + test_error(error, + "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES"); + + // Pick the minimum of the device and the kernel + if (max_workgroup_size > max_local_workgroup_size[0]) + max_workgroup_size = max_local_workgroup_size[0]; + + size_t numElementsPerLine = 10; + size_t lineCopiesPerWorkItem = 13; + elementSize = + get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize); + size_t localStorageSpacePerWorkitem = lineCopiesPerWorkItem * elementSize + * (numElementsPerLine + (localIsDst ? dstStride : srcStride)); + size_t maxLocalWorkgroupSize = + (((int)max_local_mem_size / 2) / localStorageSpacePerWorkitem); + + // Calculation can return 0 on embedded devices due to 1KB local mem limit + if (maxLocalWorkgroupSize == 0) + { + maxLocalWorkgroupSize = 1; + } + + size_t localWorkgroupSize = maxLocalWorkgroupSize; + if (maxLocalWorkgroupSize > max_workgroup_size) + localWorkgroupSize = max_workgroup_size; + + size_t maxTotalLinesIn = (max_alloc_size / elementSize + srcStride) + / (numElementsPerLine + srcStride); + size_t maxTotalLinesOut = (max_alloc_size / elementSize + dstStride) + / (numElementsPerLine + dstStride); + size_t maxTotalLines = std::min(maxTotalLinesIn, maxTotalLinesOut); + size_t maxLocalWorkgroups = + maxTotalLines / (localWorkgroupSize * lineCopiesPerWorkItem); + + size_t localBufferSize = localWorkgroupSize * localStorageSpacePerWorkitem + - (localIsDst ? dstStride : srcStride); + size_t numberOfLocalWorkgroups = std::min(1111, (int)maxLocalWorkgroups); + size_t totalLines = + numberOfLocalWorkgroups * localWorkgroupSize * lineCopiesPerWorkItem; + size_t inBufferSize = elementSize + * (totalLines * numElementsPerLine + (totalLines - 1) * srcStride); + size_t outBufferSize = elementSize + * (totalLines * numElementsPerLine + (totalLines - 1) * dstStride); + size_t globalWorkgroupSize = numberOfLocalWorkgroups * localWorkgroupSize; + + inBuffer = (void *)malloc(inBufferSize); + outBuffer = (void *)malloc(outBufferSize); + outBufferCopy = (void *)malloc(outBufferSize); + + cl_int lineCopiesPerWorkItemInt, numElementsPerLineInt, + lineCopiesPerWorkgroup; + lineCopiesPerWorkItemInt = (int)lineCopiesPerWorkItem; + numElementsPerLineInt = (int)numElementsPerLine; + lineCopiesPerWorkgroup = (int)(lineCopiesPerWorkItem * localWorkgroupSize); + + log_info( + "Global: %d, local %d, local buffer %db, global in buffer %db, " + "global out buffer %db, each work group will copy %d lines and each " + "work item item will copy %d lines.\n", + (int)globalWorkgroupSize, (int)localWorkgroupSize, (int)localBufferSize, + (int)inBufferSize, (int)outBufferSize, lineCopiesPerWorkgroup, + lineCopiesPerWorkItemInt); + + threads[0] = globalWorkgroupSize; + localThreads[0] = localWorkgroupSize; + + d = init_genrand(gRandomSeed); + generate_random_data( + vecType, inBufferSize / get_explicit_type_size(vecType), d, inBuffer); + generate_random_data( + vecType, outBufferSize / get_explicit_type_size(vecType), d, outBuffer); + free_mtdata(d); + d = NULL; + memcpy(outBufferCopy, outBuffer, outBufferSize); + + streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, inBufferSize, + inBuffer, &error); + test_error(error, "Unable to create input buffer"); + streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, outBufferSize, + outBuffer, &error); + test_error(error, "Unable to create output buffer"); + + error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]); + test_error(error, "Unable to set kernel argument"); + error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]); + test_error(error, "Unable to set kernel argument"); + error = clSetKernelArg(kernel, 2, localBufferSize, NULL); + test_error(error, "Unable to set kernel argument"); + error = clSetKernelArg(kernel, 3, sizeof(numElementsPerLineInt), + &numElementsPerLineInt); + test_error(error, "Unable to set kernel argument"); + error = clSetKernelArg(kernel, 4, sizeof(lineCopiesPerWorkgroup), + &lineCopiesPerWorkgroup); + test_error(error, "Unable to set kernel argument"); + error = clSetKernelArg(kernel, 5, sizeof(lineCopiesPerWorkItemInt), + &lineCopiesPerWorkItemInt); + test_error(error, "Unable to set kernel argument"); + error = clSetKernelArg(kernel, 6, sizeof(srcStride), &srcStride); + test_error(error, "Unable to set kernel argument"); + error = clSetKernelArg(kernel, 7, sizeof(dstStride), &dstStride); + test_error(error, "Unable to set kernel argument"); + + // Enqueue + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, + localThreads, 0, NULL, NULL); + test_error(error, "Unable to queue kernel"); + + // Read + error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, outBufferSize, + outBuffer, 0, NULL, NULL); + test_error(error, "Unable to read results"); + + // Verify + int failuresPrinted = 0; + // Verify + size_t typeSize = get_explicit_type_size(vecType) * vecSize; + for (int i = 0; + i < (int)globalWorkgroupSize * lineCopiesPerWorkItem * elementSize; + i += elementSize) + { + for (int j = 0; j < (int)numElementsPerLine * elementSize; + j += elementSize) + { + int inIdx = i * (numElementsPerLine + srcStride) + j; + int outIdx = i * (numElementsPerLine + dstStride) + j; + if (memcmp(((char *)inBuffer) + inIdx, ((char *)outBuffer) + outIdx, + typeSize) + != 0) + { + unsigned char *inchar = (unsigned char *)inBuffer + inIdx; + unsigned char *outchar = (unsigned char *)outBuffer + outIdx; + char values[4096]; + values[0] = 0; + + if (failuresPrinted == 0) + { + // Print first failure message + log_error("ERROR: Results of copy did not validate!\n"); + } + sprintf(values + strlen(values), "%d -> [", inIdx); + for (int k = 0; k < (int)elementSize; k++) + sprintf(values + strlen(values), "%2x ", inchar[k]); + sprintf(values + strlen(values), "] != ["); + for (int k = 0; k < (int)elementSize; k++) + sprintf(values + strlen(values), "%2x ", outchar[k]); + sprintf(values + strlen(values), "]"); + log_error("%s\n", values); + failuresPrinted++; + } + + if (failuresPrinted > 5) + { + log_error("Not printing further failures...\n"); + return -1; + } + } + if (i < (int)(globalWorkgroupSize * lineCopiesPerWorkItem - 1) + * elementSize) + { + int outIdx = i * (numElementsPerLine + dstStride) + + numElementsPerLine * elementSize; + if (memcmp(((char *)outBuffer) + outIdx, + ((char *)outBufferCopy) + outIdx, + dstStride * elementSize) + != 0) + { + if (failuresPrinted == 0) + { + // Print first failure message + log_error("ERROR: Results of copy did not validate!\n"); + } + log_error( + "2D copy corrupted data in output buffer in the stride " + "offset of line %d\n", + i); + failuresPrinted++; + } + if (failuresPrinted > 5) + { + log_error("Not printing further failures...\n"); + return -1; + } + } + } + + free(inBuffer); + free(outBuffer); + free(outBufferCopy); + + return failuresPrinted ? -1 : 0; +} + +int test_copy2D_all_types(cl_device_id deviceID, cl_context context, + cl_command_queue queue, const char *kernelCode, + bool localIsDst) +{ + ExplicitType vecType[] = { + kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, + kULong, kFloat, kDouble, kNumExplicitTypes + }; + unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; + unsigned int smallTypesStrideSizes[] = { 0, 10, 100 }; + unsigned int size, typeIndex, srcStride, dstStride; + + int errors = 0; + + if (!is_extension_available(deviceID, "cl_khr_extended_async_copies")) + { + log_info( + "Device does not support extended async copies. Skipping test.\n"); + return 0; + } + + for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++) + { + if (vecType[typeIndex] == kDouble + && !is_extension_available(deviceID, "cl_khr_fp64")) + continue; + + if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong) + && !gHasLong) + continue; + + for (size = 0; vecSizes[size] != 0; size++) + { + if (get_explicit_type_size(vecType[typeIndex]) * vecSizes[size] + <= 2) // small type + { + for (srcStride = 0; srcStride < sizeof(smallTypesStrideSizes) + / sizeof(smallTypesStrideSizes[0]); + srcStride++) + { + for (dstStride = 0; + dstStride < sizeof(smallTypesStrideSizes) + / sizeof(smallTypesStrideSizes[0]); + dstStride++) + { + if (test_copy2D(deviceID, context, queue, kernelCode, + vecType[typeIndex], vecSizes[size], + smallTypesStrideSizes[srcStride], + smallTypesStrideSizes[dstStride], + localIsDst)) + { + errors++; + } + } + } + } + // not a small type, check only zero stride + else if (test_copy2D(deviceID, context, queue, kernelCode, + vecType[typeIndex], vecSizes[size], 0, 0, + localIsDst)) + { + errors++; + } + } + } + if (errors) return -1; + return 0; +} + +int test_async_copy_global_to_local2D(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_copy2D_all_types(deviceID, context, queue, + async_global_to_local_kernel2D, true); +} + +int test_async_copy_local_to_global2D(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_copy2D_all_types(deviceID, context, queue, + async_local_to_global_kernel2D, false); +} diff --git a/test_conformance/basic/test_async_copy3D.cpp b/test_conformance/basic/test_async_copy3D.cpp new file mode 100644 index 0000000000..2b184ee596 --- /dev/null +++ b/test_conformance/basic/test_async_copy3D.cpp @@ -0,0 +1,546 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "../../test_common/harness/compat.h" + +#include +#include +#include +#include +#include +#include + +#include "../../test_common/harness/conversions.h" +#include "procs.h" + +static const char *async_global_to_local_kernel3D = + "#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable\n" + "%s\n" // optional pragma string + "__kernel void test_fn( const __global %s *src, __global %s *dst, __local " + "%s *localBuffer, int numElementsPerLine, int numLines, int " + "planesCopiesPerWorkgroup, int planesCopiesPerWorkItem, int srcLineStride, " + "int dstLineStride, int srcPlaneStride, int dstPlaneStride )\n" + "{\n" + " int i, j, k;\n" + // Zero the local storage first + " for(i=0; i max_global_mem_size / 2) + max_alloc_size = max_global_mem_size / 2; + + unsigned int num_of_compute_devices; + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, + sizeof(num_of_compute_devices), + &num_of_compute_devices, NULL); + test_error(error, + "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed."); + + char programSource[4096]; + programSource[0] = 0; + char *programPtr; + + sprintf(programSource, kernelCode, + vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" + : "", + vecNameString, vecNameString, vecNameString, vecNameString, + get_explicit_type_name(vecType), vecNameString, vecNameString); + // log_info("program: %s\n", programSource); + programPtr = programSource; + + error = create_single_kernel_helper(context, &program, &kernel, 1, + (const char **)&programPtr, "test_fn"); + test_error(error, "Unable to create testing kernel"); + + size_t max_workgroup_size; + error = clGetKernelWorkGroupInfo( + kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_workgroup_size), + &max_workgroup_size, NULL); + test_error( + error, + "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE."); + + size_t max_local_workgroup_size[3]; + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, + sizeof(max_local_workgroup_size), + max_local_workgroup_size, NULL); + test_error(error, + "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES"); + + // Pick the minimum of the device and the kernel + if (max_workgroup_size > max_local_workgroup_size[0]) + max_workgroup_size = max_local_workgroup_size[0]; + + size_t numElementsPerLine = 10; + size_t numLines = 13; + size_t planesCopiesPerWorkItem = 2; + elementSize = + get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize); + size_t localStorageSpacePerWorkitem = elementSize + * (planesCopiesPerWorkItem + * (numLines * numElementsPerLine + + numLines * (localIsDst ? dstLineStride : srcLineStride) + + (localIsDst ? dstPlaneStride : srcPlaneStride))); + size_t maxLocalWorkgroupSize = + (((int)max_local_mem_size / 2) / localStorageSpacePerWorkitem); + + // Calculation can return 0 on embedded devices due to 1KB local mem limit + if (maxLocalWorkgroupSize == 0) + { + maxLocalWorkgroupSize = 1; + } + + size_t localWorkgroupSize = maxLocalWorkgroupSize; + if (maxLocalWorkgroupSize > max_workgroup_size) + localWorkgroupSize = max_workgroup_size; + + size_t maxTotalPlanesIn = ((max_alloc_size / elementSize) + srcPlaneStride) + / ((numLines * numElementsPerLine + numLines * srcLineStride) + + srcPlaneStride); + size_t maxTotalPlanesOut = ((max_alloc_size / elementSize) + dstPlaneStride) + / ((numLines * numElementsPerLine + numLines * dstLineStride) + + dstPlaneStride); + size_t maxTotalPlanes = std::min(maxTotalPlanesIn, maxTotalPlanesOut); + size_t maxLocalWorkgroups = + maxTotalPlanes / (localWorkgroupSize * planesCopiesPerWorkItem); + + size_t localBufferSize = localWorkgroupSize * localStorageSpacePerWorkitem + - (localIsDst ? dstPlaneStride : srcPlaneStride); + size_t numberOfLocalWorkgroups = std::min(1111, (int)maxLocalWorkgroups); + size_t totalPlanes = + numberOfLocalWorkgroups * localWorkgroupSize * planesCopiesPerWorkItem; + size_t inBufferSize = elementSize + * (totalPlanes + * (numLines * numElementsPerLine + numLines * srcLineStride) + + (totalPlanes - 1) * srcPlaneStride); + size_t outBufferSize = elementSize + * (totalPlanes + * (numLines * numElementsPerLine + numLines * dstLineStride) + + (totalPlanes - 1) * dstPlaneStride); + size_t globalWorkgroupSize = numberOfLocalWorkgroups * localWorkgroupSize; + + inBuffer = (void *)malloc(inBufferSize); + outBuffer = (void *)malloc(outBufferSize); + outBufferCopy = (void *)malloc(outBufferSize); + + cl_int planesCopiesPerWorkItemInt, numElementsPerLineInt, numLinesInt, + planesCopiesPerWorkgroup; + planesCopiesPerWorkItemInt = (int)planesCopiesPerWorkItem; + numElementsPerLineInt = (int)numElementsPerLine; + numLinesInt = (int)numLines; + planesCopiesPerWorkgroup = + (int)(planesCopiesPerWorkItem * localWorkgroupSize); + + log_info("Global: %d, local %d, local buffer %db, global in buffer %db, " + "global out buffer %db, each work group will copy %d planes and " + "each work item item will copy %d planes.\n", + (int)globalWorkgroupSize, (int)localWorkgroupSize, + (int)localBufferSize, (int)inBufferSize, (int)outBufferSize, + planesCopiesPerWorkgroup, planesCopiesPerWorkItemInt); + + threads[0] = globalWorkgroupSize; + localThreads[0] = localWorkgroupSize; + + d = init_genrand(gRandomSeed); + generate_random_data( + vecType, inBufferSize / get_explicit_type_size(vecType), d, inBuffer); + generate_random_data( + vecType, outBufferSize / get_explicit_type_size(vecType), d, outBuffer); + free_mtdata(d); + d = NULL; + memcpy(outBufferCopy, outBuffer, outBufferSize); + + streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, inBufferSize, + inBuffer, &error); + test_error(error, "Unable to create input buffer"); + streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, outBufferSize, + outBuffer, &error); + test_error(error, "Unable to create output buffer"); + + error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]); + test_error(error, "Unable to set kernel argument"); + error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]); + test_error(error, "Unable to set kernel argument"); + error = clSetKernelArg(kernel, 2, localBufferSize, NULL); + test_error(error, "Unable to set kernel argument"); + error = clSetKernelArg(kernel, 3, sizeof(numElementsPerLineInt), + &numElementsPerLineInt); + test_error(error, "Unable to set kernel argument"); + error = clSetKernelArg(kernel, 4, sizeof(numLinesInt), &numLinesInt); + test_error(error, "Unable to set kernel argument"); + error = clSetKernelArg(kernel, 5, sizeof(planesCopiesPerWorkgroup), + &planesCopiesPerWorkgroup); + test_error(error, "Unable to set kernel argument"); + error = clSetKernelArg(kernel, 6, sizeof(planesCopiesPerWorkItemInt), + &planesCopiesPerWorkItemInt); + test_error(error, "Unable to set kernel argument"); + error = clSetKernelArg(kernel, 7, sizeof(srcLineStride), &srcLineStride); + test_error(error, "Unable to set kernel argument"); + error = clSetKernelArg(kernel, 8, sizeof(dstLineStride), &dstLineStride); + test_error(error, "Unable to set kernel argument"); + error = clSetKernelArg(kernel, 9, sizeof(srcPlaneStride), &srcPlaneStride); + test_error(error, "Unable to set kernel argument"); + error = clSetKernelArg(kernel, 10, sizeof(dstPlaneStride), &dstPlaneStride); + test_error(error, "Unable to set kernel argument"); + + // Enqueue + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, + localThreads, 0, NULL, NULL); + test_error(error, "Unable to queue kernel"); + + // Read + error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, outBufferSize, + outBuffer, 0, NULL, NULL); + test_error(error, "Unable to read results"); + + // Verify + int failuresPrinted = 0; + // Verify + size_t typeSize = get_explicit_type_size(vecType) * vecSize; + for (int i = 0; + i < (int)globalWorkgroupSize * planesCopiesPerWorkItem * elementSize; + i += elementSize) + { + for (int j = 0; j < (int)numLines * elementSize; j += elementSize) + { + for (int k = 0; k < (int)numElementsPerLine * elementSize; + k += elementSize) + { + int inIdx = i + * (numLines * numElementsPerLine + + numLines * srcLineStride + srcPlaneStride) + + j * (numElementsPerLine + srcLineStride) + k; + int outIdx = i + * (numLines * numElementsPerLine + + numLines * dstLineStride + dstPlaneStride) + + j * (numElementsPerLine + dstLineStride) + k; + if (memcmp(((char *)inBuffer) + inIdx, + ((char *)outBuffer) + outIdx, typeSize) + != 0) + { + unsigned char *inchar = (unsigned char *)inBuffer + inIdx; + unsigned char *outchar = + (unsigned char *)outBuffer + outIdx; + char values[4096]; + values[0] = 0; + + if (failuresPrinted == 0) + { + // Print first failure message + log_error("ERROR: Results of copy did not validate!"); + } + sprintf(values + strlen(values), "%d -> [", inIdx); + for (int l = 0; l < (int)elementSize; l++) + sprintf(values + strlen(values), "%2x ", inchar[l]); + sprintf(values + strlen(values), "] != ["); + for (int l = 0; l < (int)elementSize; l++) + sprintf(values + strlen(values), "%2x ", outchar[l]); + sprintf(values + strlen(values), "]"); + log_error("%s\n", values); + failuresPrinted++; + } + + if (failuresPrinted > 5) + { + log_error("Not printing further failures...\n"); + return -1; + } + } + if (j < (int)numLines * elementSize) + { + int outIdx = i + * (numLines * numElementsPerLine + + numLines * dstLineStride + dstPlaneStride) + + j * (numElementsPerLine + dstLineStride) + + numElementsPerLine * elementSize; + if (memcmp(((char *)outBuffer) + outIdx, + ((char *)outBufferCopy) + outIdx, + dstLineStride * elementSize) + != 0) + { + if (failuresPrinted == 0) + { + // Print first failure message + log_error("ERROR: Results of copy did not validate!\n"); + } + log_error( + "3D copy corrupted data in output buffer in the line " + "stride offset of plane %d line %d\n", + i, j); + failuresPrinted++; + } + if (failuresPrinted > 5) + { + log_error("Not printing further failures...\n"); + return -1; + } + } + } + if (i < (int)(globalWorkgroupSize * planesCopiesPerWorkItem - 1) + * elementSize) + { + int outIdx = i + * (numLines * numElementsPerLine + numLines * dstLineStride + + dstPlaneStride) + + (numLines * elementSize) * (numElementsPerLine) + + (numLines * elementSize) * (dstLineStride); + if (memcmp(((char *)outBuffer) + outIdx, + ((char *)outBufferCopy) + outIdx, + dstPlaneStride * elementSize) + != 0) + { + if (failuresPrinted == 0) + { + // Print first failure message + log_error("ERROR: Results of copy did not validate!\n"); + } + log_error("3D copy corrupted data in output buffer in the " + "plane stride " + "offset of plane %d\n", + i); + failuresPrinted++; + } + if (failuresPrinted > 5) + { + log_error("Not printing further failures...\n"); + return -1; + } + } + } + + free(inBuffer); + free(outBuffer); + free(outBufferCopy); + + return failuresPrinted ? -1 : 0; +} + +int test_copy3D_all_types(cl_device_id deviceID, cl_context context, + cl_command_queue queue, const char *kernelCode, + bool localIsDst) +{ + ExplicitType vecType[] = { + kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, + kULong, kFloat, kDouble, kNumExplicitTypes + }; + unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; + unsigned int smallTypesStrideSizes[] = { 0, 10, 100 }; + unsigned int size, typeIndex, srcLineStride, dstLineStride, srcPlaneStride, + dstPlaneStride; + + int errors = 0; + + if (!is_extension_available(deviceID, "cl_khr_extended_async_copies")) + { + log_info( + "Device does not support extended async copies. Skipping test.\n"); + return 0; + } + + for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++) + { + if (vecType[typeIndex] == kDouble + && !is_extension_available(deviceID, "cl_khr_fp64")) + continue; + + if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong) + && !gHasLong) + continue; + + for (size = 0; vecSizes[size] != 0; size++) + { + if (get_explicit_type_size(vecType[typeIndex]) * vecSizes[size] + <= 2) // small type + { + for (srcLineStride = 0; + srcLineStride < sizeof(smallTypesStrideSizes) + / sizeof(smallTypesStrideSizes[0]); + srcLineStride++) + { + for (dstLineStride = 0; + dstLineStride < sizeof(smallTypesStrideSizes) + / sizeof(smallTypesStrideSizes[0]); + dstLineStride++) + { + for (srcPlaneStride = 0; + srcPlaneStride < sizeof(smallTypesStrideSizes) + / sizeof(smallTypesStrideSizes[0]); + srcPlaneStride++) + { + for (dstPlaneStride = 0; + dstPlaneStride < sizeof(smallTypesStrideSizes) + / sizeof(smallTypesStrideSizes[0]); + dstPlaneStride++) + { + if (test_copy3D( + deviceID, context, queue, kernelCode, + vecType[typeIndex], vecSizes[size], + smallTypesStrideSizes[srcLineStride], + smallTypesStrideSizes[dstLineStride], + smallTypesStrideSizes[srcPlaneStride], + smallTypesStrideSizes[dstPlaneStride], + localIsDst)) + { + errors++; + } + } + } + } + } + } + // not a small type, check only zero stride + else if (test_copy3D(deviceID, context, queue, kernelCode, + vecType[typeIndex], vecSizes[size], 0, 0, 0, 0, + localIsDst)) + { + errors++; + } + } + } + if (errors) return -1; + return 0; +} + +int test_async_copy_global_to_local3D(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_copy3D_all_types(deviceID, context, queue, + async_global_to_local_kernel3D, true); +} + +int test_async_copy_local_to_global3D(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_copy3D_all_types(deviceID, context, queue, + async_local_to_global_kernel3D, false); +} diff --git a/test_conformance/basic/test_async_copy_fence.cpp b/test_conformance/basic/test_async_copy_fence.cpp new file mode 100644 index 0000000000..43245dae1c --- /dev/null +++ b/test_conformance/basic/test_async_copy_fence.cpp @@ -0,0 +1,812 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "../../test_common/harness/compat.h" + +#include +#include +#include +#include +#include + +#include "../../test_common/harness/conversions.h" +#include "procs.h" + +static const char *import_after_export_aliased_local_kernel = + "#pragma OPENCL EXTENSION cl_khr_async_work_group_copy_fence : enable\n" + "%s\n" // optional pragma string + "__kernel void test_fn( const __global %s *exportSrc, __global %s " + "*exportDst,\n" + " const __global %s *importSrc, __global %s " + "*importDst,\n" + " __local %s *localBuffer, /* there isn't another " + "__local %s local buffer since export src and import dst are aliased*/\n" + " int exportSrcLocalSize, int " + "exportCopiesPerWorkItem,\n" + " int importSrcLocalSize, int " + "importCopiesPerWorkItem )\n" + "{\n" + " int i;\n" + " int localImportOffset = exportSrcLocalSize - importSrcLocalSize;\n" + // Zero the local storage first + " for(i=0; i max_local_workgroup_size[0]) + max_workgroup_size = max_local_workgroup_size[0]; + + size_t transaction1NumberOfCopiesPerWorkitem = 13; + size_t transaction2NumberOfCopiesPerWorkitem = 2; + elementSize = + get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize); + size_t localStorageSpacePerWorkitem = + transaction1NumberOfCopiesPerWorkitem * elementSize + + (aliased_local_mem + ? 0 + : transaction2NumberOfCopiesPerWorkitem * elementSize); + size_t maxLocalWorkgroupSize = + (((int)max_local_mem_size / 2) / localStorageSpacePerWorkitem); + + // Calculation can return 0 on embedded devices due to 1KB local mem limit + if (maxLocalWorkgroupSize == 0) + { + maxLocalWorkgroupSize = 1; + } + + size_t localWorkgroupSize = maxLocalWorkgroupSize; + if (maxLocalWorkgroupSize > max_workgroup_size) + localWorkgroupSize = max_workgroup_size; + + size_t transaction1LocalBufferSize = localWorkgroupSize * elementSize + * transaction1NumberOfCopiesPerWorkitem; + size_t transaction2LocalBufferSize = localWorkgroupSize * elementSize + * transaction2NumberOfCopiesPerWorkitem; // irrelevant if + // aliased_local_mem + size_t numberOfLocalWorkgroups = 1111; + size_t transaction1GlobalBufferSize = + numberOfLocalWorkgroups * transaction1LocalBufferSize; + size_t transaction2GlobalBufferSize = + numberOfLocalWorkgroups * transaction2LocalBufferSize; + size_t globalWorkgroupSize = numberOfLocalWorkgroups * localWorkgroupSize; + + transaction1InBuffer = (void *)malloc(transaction1GlobalBufferSize); + transaction1OutBuffer = (void *)malloc(transaction1GlobalBufferSize); + transaction2InBuffer = (void *)malloc(transaction2GlobalBufferSize); + transaction2OutBuffer = (void *)malloc(transaction2GlobalBufferSize); + memset(transaction1OutBuffer, 0, transaction1GlobalBufferSize); + memset(transaction2OutBuffer, 0, transaction2GlobalBufferSize); + + cl_int transaction1CopiesPerWorkitemInt, transaction1CopiesPerWorkgroup, + transaction2CopiesPerWorkitemInt, transaction2CopiesPerWorkgroup; + transaction1CopiesPerWorkitemInt = + (int)transaction1NumberOfCopiesPerWorkitem; + transaction1CopiesPerWorkgroup = + (int)(transaction1NumberOfCopiesPerWorkitem * localWorkgroupSize); + transaction2CopiesPerWorkitemInt = + (int)transaction2NumberOfCopiesPerWorkitem; + transaction2CopiesPerWorkgroup = + (int)(transaction2NumberOfCopiesPerWorkitem * localWorkgroupSize); + + log_info( + "Global: %d, local %d. 1st Transaction: local buffer %db, global " + "buffer %db, each work group will copy %d elements and each work " + "item item will copy %d elements. 2nd Transaction: local buffer " + "%db, global buffer %db, each work group will copy %d elements and " + "each work item will copy %d elements\n", + (int)globalWorkgroupSize, (int)localWorkgroupSize, + (int)transaction1LocalBufferSize, (int)transaction1GlobalBufferSize, + transaction1CopiesPerWorkgroup, transaction1CopiesPerWorkitemInt, + (int)transaction2LocalBufferSize, (int)transaction2GlobalBufferSize, + transaction2CopiesPerWorkgroup, transaction2CopiesPerWorkitemInt); + + threads[0] = globalWorkgroupSize; + localThreads[0] = localWorkgroupSize; + + d = init_genrand(gRandomSeed); + generate_random_data( + vecType, transaction1GlobalBufferSize / get_explicit_type_size(vecType), + d, transaction1InBuffer); + if (!transaction1DstIsTransaction2Src) + { + generate_random_data(vecType, + transaction2GlobalBufferSize + / get_explicit_type_size(vecType), + d, transaction2InBuffer); + } + free_mtdata(d); + d = NULL; + + streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + transaction1GlobalBufferSize, + transaction1InBuffer, &error); + test_error(error, "Unable to create input buffer"); + streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + transaction1GlobalBufferSize, + transaction1OutBuffer, &error); + test_error(error, "Unable to create output buffer"); + if (!transaction1DstIsTransaction2Src) + { + streams[2] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + transaction2GlobalBufferSize, + transaction2InBuffer, &error); + test_error(error, "Unable to create input buffer"); + } + if (!transaction1SrcIsTransaction2Dst) + { + streams[3] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + transaction2GlobalBufferSize, + transaction2OutBuffer, &error); + test_error(error, "Unable to create output buffer"); + } + + cl_uint argIndex = 0; + error = clSetKernelArg(kernel, argIndex, sizeof(streams[0]), &streams[0]); + test_error(error, "Unable to set kernel argument"); + ++argIndex; + error = clSetKernelArg(kernel, argIndex, sizeof(streams[1]), &streams[1]); + test_error(error, "Unable to set kernel argument"); + ++argIndex; + if (!transaction1DstIsTransaction2Src) + { + error = + clSetKernelArg(kernel, argIndex, sizeof(streams[2]), &streams[2]); + test_error(error, "Unable to set kernel argument"); + ++argIndex; + } + if (!transaction1SrcIsTransaction2Dst) + { + error = + clSetKernelArg(kernel, argIndex, sizeof(streams[3]), &streams[3]); + test_error(error, "Unable to set kernel argument"); + ++argIndex; + } + error = clSetKernelArg(kernel, argIndex, transaction1LocalBufferSize, NULL); + test_error(error, "Unable to set kernel argument"); + ++argIndex; + if (!aliased_local_mem) + { + error = + clSetKernelArg(kernel, argIndex, transaction2LocalBufferSize, NULL); + test_error(error, "Unable to set kernel argument"); + ++argIndex; + } + error = + clSetKernelArg(kernel, argIndex, sizeof(transaction1CopiesPerWorkgroup), + &transaction1CopiesPerWorkgroup); + test_error(error, "Unable to set kernel argument"); + ++argIndex; + error = clSetKernelArg(kernel, argIndex, + sizeof(transaction1CopiesPerWorkitemInt), + &transaction1CopiesPerWorkitemInt); + test_error(error, "Unable to set kernel argument"); + ++argIndex; + error = + clSetKernelArg(kernel, argIndex, sizeof(transaction2CopiesPerWorkgroup), + &transaction2CopiesPerWorkgroup); + test_error(error, "Unable to set kernel argument"); + ++argIndex; + error = clSetKernelArg(kernel, argIndex, + sizeof(transaction2CopiesPerWorkitemInt), + &transaction2CopiesPerWorkitemInt); + test_error(error, "Unable to set kernel argument"); + + // Enqueue + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, + localThreads, 0, NULL, NULL); + test_error(error, "Unable to queue kernel"); + + // Read + error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, + transaction1GlobalBufferSize, + transaction1OutBuffer, 0, NULL, NULL); + test_error(error, "Unable to read results"); + if (transaction1DstIsTransaction2Src) + { + for (size_t idx = 0; idx < numberOfLocalWorkgroups; idx++) + { + memcpy( + (void *)((unsigned char *)transaction2InBuffer + + idx * transaction2CopiesPerWorkgroup * elementSize), + (const void *)((unsigned char *)transaction1OutBuffer + + (idx * transaction1CopiesPerWorkgroup + + (transaction1CopiesPerWorkgroup + - transaction2CopiesPerWorkgroup)) + * elementSize), + (size_t)transaction2CopiesPerWorkgroup * elementSize); + } + } + if (transaction1SrcIsTransaction2Dst) + { + void *transaction1SrcBuffer = + (void *)malloc(transaction1GlobalBufferSize); + error = clEnqueueReadBuffer(queue, streams[0], CL_TRUE, 0, + transaction1GlobalBufferSize, + transaction1SrcBuffer, 0, NULL, NULL); + test_error(error, "Unable to read results"); + for (size_t idx = 0; idx < numberOfLocalWorkgroups; idx++) + { + memcpy( + (void *)((unsigned char *)transaction2OutBuffer + + idx * transaction2CopiesPerWorkgroup * elementSize), + (const void *)((unsigned char *)transaction1SrcBuffer + + (idx * transaction1CopiesPerWorkgroup + + (transaction1CopiesPerWorkgroup + - transaction2CopiesPerWorkgroup)) + * elementSize), + (size_t)transaction2CopiesPerWorkgroup * elementSize); + } + free(transaction1SrcBuffer); + } + else + { + error = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, + transaction2GlobalBufferSize, + transaction2OutBuffer, 0, NULL, NULL); + test_error(error, "Unable to read results"); + } + + // Verify + int failuresPrinted = 0; + if (memcmp(transaction1InBuffer, transaction1OutBuffer, + transaction1GlobalBufferSize) + != 0) + { + size_t typeSize = get_explicit_type_size(vecType) * vecSize; + unsigned char *inchar = (unsigned char *)transaction1InBuffer; + unsigned char *outchar = (unsigned char *)transaction1OutBuffer; + for (int i = 0; i < (int)transaction1GlobalBufferSize; + i += (int)elementSize) + { + if (memcmp(((char *)inchar) + i, ((char *)outchar) + i, typeSize) + != 0) + { + char values[4096]; + values[0] = 0; + if (failuresPrinted == 0) + { + // Print first failure message + log_error("ERROR: Results of 1st transaction did not " + "validate!\n"); + } + sprintf(values + strlen(values), "%d -> [", i); + for (int j = 0; j < (int)elementSize; j++) + sprintf(values + strlen(values), "%2x ", inchar[i + j]); + sprintf(values + strlen(values), "] != ["); + for (int j = 0; j < (int)elementSize; j++) + sprintf(values + strlen(values), "%2x ", outchar[i + j]); + sprintf(values + strlen(values), "]"); + log_error("%s\n", values); + failuresPrinted++; + } + + if (failuresPrinted > 5) + { + log_error("Not printing further failures...\n"); + break; + } + } + } + if (memcmp(transaction2InBuffer, transaction2OutBuffer, + transaction2GlobalBufferSize) + != 0) + { + size_t typeSize = get_explicit_type_size(vecType) * vecSize; + unsigned char *inchar = (unsigned char *)transaction2InBuffer; + unsigned char *outchar = (unsigned char *)transaction2OutBuffer; + for (int i = 0; i < (int)transaction2GlobalBufferSize; + i += (int)elementSize) + { + if (memcmp(((char *)inchar) + i, ((char *)outchar) + i, typeSize) + != 0) + { + char values[4096]; + values[0] = 0; + if (failuresPrinted == 0) + { + // Print first failure message + log_error("ERROR: Results of 2nd transaction did not " + "validate!\n"); + } + sprintf(values + strlen(values), "%d -> [", i); + for (int j = 0; j < (int)elementSize; j++) + sprintf(values + strlen(values), "%2x ", inchar[i + j]); + sprintf(values + strlen(values), "] != ["); + for (int j = 0; j < (int)elementSize; j++) + sprintf(values + strlen(values), "%2x ", outchar[i + j]); + sprintf(values + strlen(values), "]"); + log_error("%s\n", values); + failuresPrinted++; + } + + if (failuresPrinted > 5) + { + log_error("Not printing further failures...\n"); + break; + } + } + } + + free(transaction1InBuffer); + free(transaction1OutBuffer); + free(transaction2InBuffer); + free(transaction2OutBuffer); + + return failuresPrinted ? -1 : 0; +} + +int test_copy_fence_all_types(cl_device_id deviceID, cl_context context, + cl_command_queue queue, const char *kernelCode, + bool export_after_import, bool aliased_local_mem, + bool aliased_global_mem) +{ + ExplicitType vecType[] = { + kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, + kULong, kFloat, kDouble, kNumExplicitTypes + }; + unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; + unsigned int size, typeIndex; + + int errors = 0; + + if (!is_extension_available(deviceID, "cl_khr_async_work_group_copy_fence")) + { + log_info( + "Device does not support extended async copies fence. Skipping " + "test.\n"); + return 0; + } + + for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++) + { + if (vecType[typeIndex] == kDouble + && !is_extension_available(deviceID, "cl_khr_fp64")) + continue; + + if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong) + && !gHasLong) + continue; + + for (size = 0; vecSizes[size] != 0; size++) + { + if (test_copy_fence(deviceID, context, queue, kernelCode, + vecType[typeIndex], vecSizes[size], + export_after_import, aliased_local_mem, + aliased_global_mem)) + { + errors++; + } + } + } + if (errors) return -1; + return 0; +} + +int test_async_work_group_copy_fence_import_after_export_aliased_local( + cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements) +{ + return test_copy_fence_all_types(deviceID, context, queue, + import_after_export_aliased_local_kernel, + false, true, false); +} + +int test_async_work_group_copy_fence_import_after_export_aliased_global( + cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements) +{ + return test_copy_fence_all_types(deviceID, context, queue, + import_after_export_aliased_global_kernel, + false, false, true); +} + +int test_async_work_group_copy_fence_import_after_export_aliased_global_and_local( + cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements) +{ + return test_copy_fence_all_types( + deviceID, context, queue, + import_after_export_aliased_global_and_local_kernel, false, true, true); +} + +int test_async_work_group_copy_fence_export_after_import_aliased_local( + cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements) +{ + return test_copy_fence_all_types(deviceID, context, queue, + export_after_import_aliased_local_kernel, + true, true, false); +} + +int test_async_work_group_copy_fence_export_after_import_aliased_global( + cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements) +{ + return test_copy_fence_all_types(deviceID, context, queue, + export_after_import_aliased_global_kernel, + true, false, true); +} + +int test_async_work_group_copy_fence_export_after_import_aliased_global_and_local( + cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements) +{ + return test_copy_fence_all_types( + deviceID, context, queue, + export_after_import_aliased_global_and_local_kernel, true, true, true); +} diff --git a/test_conformance/basic/test_basic_parameter_types.cpp b/test_conformance/basic/test_basic_parameter_types.cpp index 886da6a108..6e99d462ce 100644 --- a/test_conformance/basic/test_basic_parameter_types.cpp +++ b/test_conformance/basic/test_basic_parameter_types.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -23,279 +23,381 @@ #include "procs.h" -const char *kernel_code = -"__kernel void test_kernel(\n" -"char%s c, uchar%s uc, short%s s, ushort%s us, int%s i, uint%s ui, float%s f,\n" -"__global float%s *result)\n" -"{\n" -" result[0] = %s(c);\n" -" result[1] = %s(uc);\n" -" result[2] = %s(s);\n" -" result[3] = %s(us);\n" -" result[4] = %s(i);\n" -" result[5] = %s(ui);\n" -" result[6] = f;\n" -"}\n"; - -const char *kernel_code_long = -"__kernel void test_kernel_long(\n" -"long%s l, ulong%s ul,\n" -"__global float%s *result)\n" -"{\n" -" result[0] = %s(l);\n" -" result[1] = %s(ul);\n" -"}\n"; - -int test_parameter_types_long(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) +const char *kernel_code = R"( +__kernel void test_kernel( +char%s c, uchar%s uc, short%s s, ushort%s us, int%s i, uint%s ui, float%s f, +__global float%s *result) { - clMemWrapper results; - int error; - size_t global[3] = {1, 1, 1}; - float results_back[2*16]; - int count, index; - const char* types[] = { "long", "ulong" }; - char kernel_string[8192]; - int sizes[] = {1, 2, 4, 8, 16}; - const char* size_strings[] = {"", "2", "4", "8", "16"}; - float expected; - int total_errors = 0; - int size_to_test; - char *ptr; - char convert_string[1024]; - size_t max_parameter_size; - - // We don't really care about the contents since we're just testing that the types work. - cl_long l[16]={-21,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15}; - cl_ulong ul[16]={22,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; - - // Calculate how large our paramter size is to the kernel - size_t parameter_size = sizeof(cl_long) + sizeof(cl_ulong); - - // Init our strings. - kernel_string[0] = '\0'; - convert_string[0] = '\0'; - - // Get the maximum parameter size allowed - error = clGetDeviceInfo( device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( max_parameter_size ), &max_parameter_size, NULL ); - test_error( error, "Unable to get max parameter size from device" ); - - // Create the results buffer - results = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float)*2*16, NULL, &error); - test_error(error, "clCreateBuffer failed"); - - // Go over all the vector sizes - for (size_to_test = 0; size_to_test < 5; size_to_test++) { - clProgramWrapper program; - clKernelWrapper kernel; - - size_t total_parameter_size = parameter_size*sizes[size_to_test] + sizeof(cl_mem); - if (total_parameter_size > max_parameter_size) { - log_info("Can not test with vector size %d because it would exceed the maximum allowed parameter size to the kernel. (%d > %d)\n", - (int)sizes[size_to_test], (int)total_parameter_size, (int)max_parameter_size); - continue; - } + result[0] = %s(c); + result[1] = %s(uc); + result[2] = %s(s); + result[3] = %s(us); + result[4] = %s(i); + result[5] = %s(ui); + result[6] = f; +})"; + +const char *kernel_code_long = R"( +__kernel void test_kernel_long( +long%s l, ulong%s ul, +__global float%s *result) +{ + result[0] = %s(l); + result[1] = %s(ul); +})"; - log_info("Testing vector size %d\n", sizes[size_to_test]); +int test_parameter_types_long(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + clMemWrapper results; + int error; + size_t global[3] = { 1, 1, 1 }; + float results_back[2 * 16]; + int count, index; + const char *types[] = { "long", "ulong" }; + char kernel_string[8192]; + int sizes[] = { 1, 2, 4, 8, 16 }; + const char *size_strings[] = { "", "2", "4", "8", "16" }; + float expected; + int total_errors = 0; + int size_to_test; + char *ptr; + char convert_string[1024]; + size_t max_parameter_size; + + // We don't really care about the contents since we're just testing that the + // types work. + cl_long l[16] = { -21, -1, 2, -3, 4, -5, 6, -7, + 8, -9, 10, -11, 12, -13, 14, -15 }; + cl_ulong ul[16] = { 22, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; + + // Calculate how large our paramter size is to the kernel + size_t parameter_size = sizeof(cl_long) + sizeof(cl_ulong); + + // Init our strings. + kernel_string[0] = '\0'; + convert_string[0] = '\0'; + + // Get the maximum parameter size allowed + error = + clGetDeviceInfo(device, CL_DEVICE_MAX_PARAMETER_SIZE, + sizeof(max_parameter_size), &max_parameter_size, NULL); + test_error(error, "Unable to get max parameter size from device"); + + // Create the results buffer + results = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(cl_float) * 2 * 16, NULL, &error); + test_error(error, "clCreateBuffer failed"); + + // Go over all the vector sizes + for (size_to_test = 0; size_to_test < 5; size_to_test++) + { + clProgramWrapper program; + clKernelWrapper kernel; + + size_t total_parameter_size = + parameter_size * sizes[size_to_test] + sizeof(cl_mem); + if (total_parameter_size > max_parameter_size) + { + log_info( + "Can not test with vector size %d because it would exceed the " + "maximum allowed parameter size to the kernel. (%d > %d)\n", + (int)sizes[size_to_test], (int)total_parameter_size, + (int)max_parameter_size); + continue; + } - // If size is > 1, then we need a explicit convert call. - if (sizes[size_to_test] > 1) { - sprintf(convert_string, "convert_float%s", size_strings[size_to_test]); - } else { - sprintf(convert_string, " "); - } + log_info("Testing vector size %d\n", sizes[size_to_test]); - // Build the kernel - sprintf(kernel_string, kernel_code_long, - size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test], - convert_string, convert_string - ); - - ptr = kernel_string; - error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&ptr, "test_kernel_long"); - test_error(error, "create single kernel failed"); - - // Set the arguments - for (count = 0; count < 2; count++) { - switch (count) { - case 0: error = clSetKernelArg(kernel, count, sizeof(cl_long)*sizes[size_to_test], &l); break; - case 1: error = clSetKernelArg(kernel, count, sizeof(cl_ulong)*sizes[size_to_test], &ul); break; - default: log_error("Test error"); break; - } - if (error) - log_error("Setting kernel arg %d %s%s: ", count, types[count], size_strings[size_to_test]); - test_error(error, "clSetKernelArgs failed"); - } - error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &results); - test_error(error, "clSetKernelArgs failed"); - - // Execute - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL); - test_error(error, "clEnqueueNDRangeKernel failed"); - - // Read back the results - error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_float)*2*16, results_back, 0, NULL, NULL); - test_error(error, "clEnqueueReadBuffer failed"); - - // Verify the results - for (count = 0; count < 2; count++) { - for (index=0; index < sizes[size_to_test]; index++) { - switch (count) { - case 0: expected = (float)l[index]; break; - case 1: expected = (float)ul[index]; break; - default: log_error("Test error"); break; + // If size is > 1, then we need a explicit convert call. + if (sizes[size_to_test] > 1) + { + sprintf(convert_string, "convert_float%s", + size_strings[size_to_test]); + } + else + { + sprintf(convert_string, " "); } - if (results_back[count*sizes[size_to_test]+index] != expected) { - total_errors++; - log_error("Conversion from %s%s failed: index %d got %g, expected %g.\n", types[count], size_strings[size_to_test], - index, results_back[count*sizes[size_to_test]+index], expected); + // Build the kernel + sprintf(kernel_string, kernel_code_long, size_strings[size_to_test], + size_strings[size_to_test], size_strings[size_to_test], + convert_string, convert_string); + + ptr = kernel_string; + error = create_single_kernel_helper(context, &program, &kernel, 1, + (const char **)&ptr, + "test_kernel_long"); + test_error(error, "create single kernel failed"); + + // Set the arguments + for (count = 0; count < 2; count++) + { + switch (count) + { + case 0: + error = clSetKernelArg( + kernel, count, sizeof(cl_long) * sizes[size_to_test], + &l); + break; + case 1: + error = clSetKernelArg( + kernel, count, sizeof(cl_ulong) * sizes[size_to_test], + &ul); + break; + default: log_error("Test error"); break; + } + if (error) + log_error("Setting kernel arg %d %s%s: ", count, types[count], + size_strings[size_to_test]); + test_error(error, "clSetKernelArgs failed"); + } + error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &results); + test_error(error, "clSetKernelArgs failed"); + + // Execute + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, + NULL, NULL); + test_error(error, "clEnqueueNDRangeKernel failed"); + + // Read back the results + error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, + sizeof(cl_float) * 2 * 16, results_back, 0, + NULL, NULL); + test_error(error, "clEnqueueReadBuffer failed"); + + // Verify the results + for (count = 0; count < 2; count++) + { + for (index = 0; index < sizes[size_to_test]; index++) + { + switch (count) + { + case 0: expected = (float)l[index]; break; + case 1: expected = (float)ul[index]; break; + default: log_error("Test error"); break; + } + + if (results_back[count * sizes[size_to_test] + index] + != expected) + { + total_errors++; + log_error("Conversion from %s%s failed: index %d got %g, " + "expected %g.\n", + types[count], size_strings[size_to_test], index, + results_back[count * sizes[size_to_test] + index], + expected); + } + } } - } } - } - return total_errors; + return total_errors; } -int test_parameter_types(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) +int test_parameter_types(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) { - clMemWrapper results; - int error; - size_t global[3] = {1, 1, 1}; - float results_back[7*16]; - int count, index; - const char* types[] = {"char", "uchar", "short", "ushort", "int", "uint", "float"}; - char kernel_string[8192]; - int sizes[] = {1, 2, 4, 8, 16}; - const char* size_strings[] = {"", "2", "4", "8", "16"}; - float expected; - int total_errors = 0; - int size_to_test; - char *ptr; - char convert_string[1024]; - size_t max_parameter_size; - - // We don't really care about the contents since we're just testing that the types work. - cl_char c[16]={0,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15}; - cl_uchar uc[16]={16,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; - cl_short s[16]={-17,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15}; - cl_ushort us[16]={18,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; - cl_int i[16]={-19,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15}; - cl_uint ui[16]={20,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; - cl_float f[16]={-23,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15}; - - // Calculate how large our paramter size is to the kernel - size_t parameter_size = sizeof(cl_char) + sizeof(cl_uchar) + - sizeof(cl_short) +sizeof(cl_ushort) + - sizeof(cl_int) +sizeof(cl_uint) + - sizeof(cl_float); - - // Init our strings. - kernel_string[0] = '\0'; - convert_string[0] = '\0'; - - // Get the maximum parameter size allowed - error = clGetDeviceInfo( device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( max_parameter_size ), &max_parameter_size, NULL ); - test_error( error, "Unable to get max parameter size from device" ); - - // Create the results buffer - results = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float)*7*16, NULL, &error); - test_error(error, "clCreateBuffer failed"); - - // Go over all the vector sizes - for (size_to_test = 0; size_to_test < 5; size_to_test++) { - clProgramWrapper program; - clKernelWrapper kernel; - - size_t total_parameter_size = parameter_size*sizes[size_to_test] + sizeof(cl_mem); - if (total_parameter_size > max_parameter_size) { - log_info("Can not test with vector size %d because it would exceed the maximum allowed parameter size to the kernel. (%d > %d)\n", - (int)sizes[size_to_test], (int)total_parameter_size, (int)max_parameter_size); - continue; - } - - log_info("Testing vector size %d\n", sizes[size_to_test]); + clMemWrapper results; + int error; + size_t global[3] = { 1, 1, 1 }; + float results_back[7 * 16]; + int count, index; + const char *types[] = { "char", "uchar", "short", "ushort", + "int", "uint", "float" }; + char kernel_string[8192]; + int sizes[] = { 1, 2, 4, 8, 16 }; + const char *size_strings[] = { "", "2", "4", "8", "16" }; + float expected; + int total_errors = 0; + int size_to_test; + char *ptr; + char convert_string[1024]; + size_t max_parameter_size; + + // We don't really care about the contents since we're just testing that the + // types work. + cl_char c[16] = { 0, -1, 2, -3, 4, -5, 6, -7, + 8, -9, 10, -11, 12, -13, 14, -15 }; + cl_uchar uc[16] = { 16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; + cl_short s[16] = { -17, -1, 2, -3, 4, -5, 6, -7, + 8, -9, 10, -11, 12, -13, 14, -15 }; + cl_ushort us[16] = { + 18, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + }; + cl_int i[16] = { -19, -1, 2, -3, 4, -5, 6, -7, + 8, -9, 10, -11, 12, -13, 14, -15 }; + cl_uint ui[16] = { 20, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; + cl_float f[16] = { -23, -1, 2, -3, 4, -5, 6, -7, + 8, -9, 10, -11, 12, -13, 14, -15 }; + + // Calculate how large our paramter size is to the kernel + size_t parameter_size = sizeof(cl_char) + sizeof(cl_uchar) + + sizeof(cl_short) + sizeof(cl_ushort) + sizeof(cl_int) + + sizeof(cl_uint) + sizeof(cl_float); + + // Init our strings. + kernel_string[0] = '\0'; + convert_string[0] = '\0'; + + // Get the maximum parameter size allowed + error = + clGetDeviceInfo(device, CL_DEVICE_MAX_PARAMETER_SIZE, + sizeof(max_parameter_size), &max_parameter_size, NULL); + test_error(error, "Unable to get max parameter size from device"); + + // Create the results buffer + results = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(cl_float) * 7 * 16, NULL, &error); + test_error(error, "clCreateBuffer failed"); + + // Go over all the vector sizes + for (size_to_test = 0; size_to_test < 5; size_to_test++) + { + clProgramWrapper program; + clKernelWrapper kernel; + + size_t total_parameter_size = + parameter_size * sizes[size_to_test] + sizeof(cl_mem); + if (total_parameter_size > max_parameter_size) + { + log_info( + "Can not test with vector size %d because it would exceed the " + "maximum allowed parameter size to the kernel. (%d > %d)\n", + (int)sizes[size_to_test], (int)total_parameter_size, + (int)max_parameter_size); + continue; + } - // If size is > 1, then we need a explicit convert call. - if (sizes[size_to_test] > 1) { - sprintf(convert_string, "convert_float%s", size_strings[size_to_test]); - } else { - sprintf(convert_string, " "); - } + log_info("Testing vector size %d\n", sizes[size_to_test]); - // Build the kernel - sprintf(kernel_string, kernel_code, - size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test], - size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test], - size_strings[size_to_test], size_strings[size_to_test], - convert_string, convert_string, convert_string, - convert_string, convert_string, convert_string - ); - - ptr = kernel_string; - error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&ptr, "test_kernel"); - test_error(error, "create single kernel failed"); - - // Set the arguments - for (count = 0; count < 7; count++) { - switch (count) { - case 0: error = clSetKernelArg(kernel, count, sizeof(cl_char)*sizes[size_to_test], &c); break; - case 1: error = clSetKernelArg(kernel, count, sizeof(cl_uchar)*sizes[size_to_test], &uc); break; - case 2: error = clSetKernelArg(kernel, count, sizeof(cl_short)*sizes[size_to_test], &s); break; - case 3: error = clSetKernelArg(kernel, count, sizeof(cl_ushort)*sizes[size_to_test], &us); break; - case 4: error = clSetKernelArg(kernel, count, sizeof(cl_int)*sizes[size_to_test], &i); break; - case 5: error = clSetKernelArg(kernel, count, sizeof(cl_uint)*sizes[size_to_test], &ui); break; - case 6: error = clSetKernelArg(kernel, count, sizeof(cl_float)*sizes[size_to_test], &f); break; - default: log_error("Test error"); break; - } - if (error) - log_error("Setting kernel arg %d %s%s: ", count, types[count], size_strings[size_to_test]); - test_error(error, "clSetKernelArgs failed"); - } - error = clSetKernelArg(kernel, 7, sizeof(cl_mem), &results); - test_error(error, "clSetKernelArgs failed"); - - // Execute - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL); - test_error(error, "clEnqueueNDRangeKernel failed"); - - // Read back the results - error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_float)*7*16, results_back, 0, NULL, NULL); - test_error(error, "clEnqueueReadBuffer failed"); - - // Verify the results - for (count = 0; count < 7; count++) { - for (index=0; index < sizes[size_to_test]; index++) { - switch (count) { - case 0: expected = (float)c[index]; break; - case 1: expected = (float)uc[index]; break; - case 2: expected = (float)s[index]; break; - case 3: expected = (float)us[index]; break; - case 4: expected = (float)i[index]; break; - case 5: expected = (float)ui[index]; break; - case 6: expected = (float)f[index]; break; - default: log_error("Test error"); break; + // If size is > 1, then we need a explicit convert call. + if (sizes[size_to_test] > 1) + { + sprintf(convert_string, "convert_float%s", + size_strings[size_to_test]); + } + else + { + sprintf(convert_string, " "); } - if (results_back[count*sizes[size_to_test]+index] != expected) { - total_errors++; - log_error("Conversion from %s%s failed: index %d got %g, expected %g.\n", types[count], size_strings[size_to_test], - index, results_back[count*sizes[size_to_test]+index], expected); + // Build the kernel + sprintf(kernel_string, kernel_code, size_strings[size_to_test], + size_strings[size_to_test], size_strings[size_to_test], + size_strings[size_to_test], size_strings[size_to_test], + size_strings[size_to_test], size_strings[size_to_test], + size_strings[size_to_test], convert_string, convert_string, + convert_string, convert_string, convert_string, convert_string); + + ptr = kernel_string; + error = create_single_kernel_helper(context, &program, &kernel, 1, + (const char **)&ptr, "test_kernel"); + test_error(error, "create single kernel failed"); + + // Set the arguments + for (count = 0; count < 7; count++) + { + switch (count) + { + case 0: + error = clSetKernelArg( + kernel, count, sizeof(cl_char) * sizes[size_to_test], + &c); + break; + case 1: + error = clSetKernelArg( + kernel, count, sizeof(cl_uchar) * sizes[size_to_test], + &uc); + break; + case 2: + error = clSetKernelArg( + kernel, count, sizeof(cl_short) * sizes[size_to_test], + &s); + break; + case 3: + error = clSetKernelArg( + kernel, count, sizeof(cl_ushort) * sizes[size_to_test], + &us); + break; + case 4: + error = clSetKernelArg(kernel, count, + sizeof(cl_int) * sizes[size_to_test], + &i); + break; + case 5: + error = clSetKernelArg( + kernel, count, sizeof(cl_uint) * sizes[size_to_test], + &ui); + break; + case 6: + error = clSetKernelArg( + kernel, count, sizeof(cl_float) * sizes[size_to_test], + &f); + break; + default: log_error("Test error"); break; + } + if (error) + log_error("Setting kernel arg %d %s%s: ", count, types[count], + size_strings[size_to_test]); + test_error(error, "clSetKernelArgs failed"); + } + error = clSetKernelArg(kernel, 7, sizeof(cl_mem), &results); + test_error(error, "clSetKernelArgs failed"); + + // Execute + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, + NULL, NULL); + test_error(error, "clEnqueueNDRangeKernel failed"); + + // Read back the results + error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, + sizeof(cl_float) * 7 * 16, results_back, 0, + NULL, NULL); + test_error(error, "clEnqueueReadBuffer failed"); + + // Verify the results + for (count = 0; count < 7; count++) + { + for (index = 0; index < sizes[size_to_test]; index++) + { + switch (count) + { + case 0: expected = (float)c[index]; break; + case 1: expected = (float)uc[index]; break; + case 2: expected = (float)s[index]; break; + case 3: expected = (float)us[index]; break; + case 4: expected = (float)i[index]; break; + case 5: expected = (float)ui[index]; break; + case 6: expected = (float)f[index]; break; + default: log_error("Test error"); break; + } + + if (results_back[count * sizes[size_to_test] + index] + != expected) + { + total_errors++; + log_error("Conversion from %s%s failed: index %d got %g, " + "expected %g.\n", + types[count], size_strings[size_to_test], index, + results_back[count * sizes[size_to_test] + index], + expected); + } + } } - } } - } - if (gHasLong) { - log_info("Testing long types...\n"); - total_errors += test_parameter_types_long( device, context, queue, num_elements ); - } - else { - log_info("Longs unsupported, skipping."); - } + if (gHasLong) + { + log_info("Testing long types...\n"); + total_errors += + test_parameter_types_long(device, context, queue, num_elements); + } + else + { + log_info("Longs unsupported, skipping."); + } - return total_errors; + return total_errors; } - - - diff --git a/test_conformance/basic/test_enqueued_local_size.cpp b/test_conformance/basic/test_enqueued_local_size.cpp index f52162a815..91fe1434e9 100644 --- a/test_conformance/basic/test_enqueued_local_size.cpp +++ b/test_conformance/basic/test_enqueued_local_size.cpp @@ -14,13 +14,15 @@ // limitations under the License. // #include "harness/compat.h" +#include "harness/rounding_mode.h" #include #include #include #include #include -#include "harness/rounding_mode.h" + +#include #include "procs.h" @@ -124,8 +126,8 @@ test_enqueued_local_size(cl_device_id device, cl_context context, cl_command_que err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_wgs), &max_wgs, NULL); test_error( err, "clGetDeviceInfo failed."); - localsize[0] = MIN(16, max_wgs); - localsize[1] = MIN(11, max_wgs / localsize[0]); + localsize[0] = std::min(16, max_wgs); + localsize[1] = std::min(11, max_wgs / localsize[0]); // If we need to use uniform workgroups because non-uniform workgroups are // not supported, round up to the next global size that is divisible by the // local size. diff --git a/test_conformance/basic/test_imagedim.cpp b/test_conformance/basic/test_imagedim.cpp index 6064655f0c..008c88b6af 100644 --- a/test_conformance/basic/test_imagedim.cpp +++ b/test_conformance/basic/test_imagedim.cpp @@ -38,24 +38,25 @@ static const char *image_dim_kernel_code = "}\n"; -static unsigned char * -generate_8888_image(int w, int h, MTdata d) +static unsigned char *generate_8888_image(size_t w, size_t h, MTdata d) { - unsigned char *ptr = (unsigned char*)malloc(w * h * 4); - int i; + unsigned char *ptr = new unsigned char[4 * w * h]; + size_t i; - for (i=0; i n) m >>= 1; - max_img_dim = (int)m; + max_img_dim = m; } if (max_img_width > max_img_dim) @@ -151,13 +152,14 @@ test_imagedim_pow2(cl_device_id device, cl_context context, cl_command_queue que d = init_genrand( gRandomSeed ); input_ptr = generate_8888_image(max_img_width, max_img_height, d); - output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * max_img_width * max_img_height); + + output_ptr = new unsigned char[4 * max_img_width * max_img_height]; // test power of 2 width, height starting at 1 to 4K - for (i=1,i2=0; i<=max_img_height; i<<=1,i2++) + for (i = 1, i2 = 0; i <= max_img_height; i <<= 1, i2++) { img_height = (1 << i2); - for (j=1,j2=0; j<=max_img_width; j<<=1,j2++) + for (j = 1, j2 = 0; j <= max_img_width; j <<= 1, j2++) { img_width = (1 << j2); @@ -169,8 +171,8 @@ test_imagedim_pow2(cl_device_id device, cl_context context, cl_command_queue que if (!streams[0]) { log_error("create_image_2d failed. width = %d, height = %d\n", img_width, img_height); - free(input_ptr); - free(output_ptr); + delete[] input_ptr; + delete[] output_ptr; free_mtdata(d); return -1; } @@ -183,8 +185,8 @@ test_imagedim_pow2(cl_device_id device, cl_context context, cl_command_queue que { log_error("create_image_2d failed. width = %d, height = %d\n", img_width, img_height); clReleaseMemObject(streams[0]); - free(input_ptr); - free(output_ptr); + delete[] input_ptr; + delete[] output_ptr; free_mtdata(d); return -1; } @@ -197,8 +199,8 @@ test_imagedim_pow2(cl_device_id device, cl_context context, cl_command_queue que log_error("clWriteImage failed\n"); clReleaseMemObject(streams[0]); clReleaseMemObject(streams[1]); - free(input_ptr); - free(output_ptr); + delete[] input_ptr; + delete[] output_ptr; free_mtdata(d); return -1; } @@ -211,8 +213,8 @@ test_imagedim_pow2(cl_device_id device, cl_context context, cl_command_queue que log_error("clSetKernelArgs failed\n"); clReleaseMemObject(streams[0]); clReleaseMemObject(streams[1]); - free(input_ptr); - free(output_ptr); + delete[] input_ptr; + delete[] output_ptr; free_mtdata(d); return -1; } @@ -228,8 +230,8 @@ test_imagedim_pow2(cl_device_id device, cl_context context, cl_command_queue que img_width, img_height); clReleaseMemObject(streams[0]); clReleaseMemObject(streams[1]); - free(input_ptr); - free(output_ptr); + delete[] input_ptr; + delete[] output_ptr; free_mtdata(d); return -1; } @@ -241,8 +243,8 @@ test_imagedim_pow2(cl_device_id device, cl_context context, cl_command_queue que img_width, img_height); clReleaseMemObject(streams[0]); clReleaseMemObject(streams[1]); - free(input_ptr); - free(output_ptr); + delete[] input_ptr; + delete[] output_ptr; free_mtdata(d); return -1; } @@ -259,8 +261,8 @@ test_imagedim_pow2(cl_device_id device, cl_context context, cl_command_queue que } // cleanup - free(input_ptr); - free(output_ptr); + delete[] input_ptr; + delete[] output_ptr; free_mtdata(d); clReleaseSampler(sampler); clReleaseKernel(kernel); @@ -274,18 +276,18 @@ test_imagedim_pow2(cl_device_id device, cl_context context, cl_command_queue que int test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { - cl_mem streams[2]; - cl_image_format img_format; - unsigned char *input_ptr, *output_ptr; - cl_program program; - cl_kernel kernel; - size_t threads[2], local_threads[2]; - cl_ulong max_mem_size; - int img_width, max_img_width; - int img_height, max_img_height; - int max_img_dim; - int i, j, i2, j2, err=0; - size_t max_image2d_width, max_image2d_height; + cl_mem streams[2]; + cl_image_format img_format; + unsigned char *input_ptr, *output_ptr; + cl_program program; + cl_kernel kernel; + size_t threads[2], local_threads[2]; + cl_ulong max_mem_size; + size_t img_width, max_img_width; + size_t img_height, max_img_height; + size_t max_img_dim; + int i, j, i2, j2, err = 0; + size_t max_image2d_width, max_image2d_height; int total_errors = 0; size_t max_local_workgroup_size[3]; MTdata d; @@ -365,10 +367,10 @@ test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue d = init_genrand( gRandomSeed ); input_ptr = generate_8888_image(max_img_width, max_img_height, d); - output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * max_img_width * max_img_height); + output_ptr = new unsigned char[4 * max_img_width * max_img_height]; int plus_minus; - for (plus_minus=0; plus_minus < 3; plus_minus++) + for (plus_minus = 0; plus_minus < 3; plus_minus++) { // test power of 2 width, height starting at 1 to 4K @@ -379,8 +381,8 @@ test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue { img_width = (1 << j2); - int effective_img_height = img_height; - int effective_img_width = img_width; + size_t effective_img_height = img_height; + size_t effective_img_width = img_width; local_threads[0] = 1; local_threads[1] = 1; @@ -414,8 +416,8 @@ test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue if (!streams[0]) { log_error("create_image_2d failed. width = %d, height = %d\n", effective_img_width, effective_img_height); - free(input_ptr); - free(output_ptr); + delete[] input_ptr; + delete[] output_ptr; free_mtdata(d); return -1; } @@ -428,8 +430,8 @@ test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue { log_error("create_image_2d failed. width = %d, height = %d\n", effective_img_width, effective_img_height); clReleaseMemObject(streams[0]); - free(input_ptr); - free(output_ptr); + delete[] input_ptr; + delete[] output_ptr; free_mtdata(d); return -1; } @@ -442,8 +444,8 @@ test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue log_error("clWriteImage failed\n"); clReleaseMemObject(streams[0]); clReleaseMemObject(streams[1]); - free(input_ptr); - free(output_ptr); + delete[] input_ptr; + delete[] output_ptr; free_mtdata(d); return -1; } @@ -456,8 +458,8 @@ test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue log_error("clSetKernelArgs failed\n"); clReleaseMemObject(streams[0]); clReleaseMemObject(streams[1]); - free(input_ptr); - free(output_ptr); + delete[] input_ptr; + delete[] output_ptr; free_mtdata(d); return -1; } @@ -474,8 +476,8 @@ test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue effective_img_width, effective_img_height, (int)local_threads[0], (int)local_threads[1]); clReleaseMemObject(streams[0]); clReleaseMemObject(streams[1]); - free(input_ptr); - free(output_ptr); + delete[] input_ptr; + delete[] output_ptr; free_mtdata(d); return -1; } @@ -487,8 +489,8 @@ test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue effective_img_width, effective_img_height, (int)local_threads[0], (int)local_threads[1]); clReleaseMemObject(streams[0]); clReleaseMemObject(streams[1]); - free(input_ptr); - free(output_ptr); + delete[] input_ptr; + delete[] output_ptr; free_mtdata(d); return -1; } @@ -506,15 +508,15 @@ test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue } - // cleanup - free(input_ptr); - free(output_ptr); - free_mtdata(d); - clReleaseSampler(sampler); - clReleaseKernel(kernel); - clReleaseProgram(program); + // cleanup + delete[] input_ptr; + delete[] output_ptr; + free_mtdata(d); + clReleaseSampler(sampler); + clReleaseKernel(kernel); + clReleaseProgram(program); - return total_errors; + return total_errors; } diff --git a/test_conformance/basic/test_simple_image_pitch.cpp b/test_conformance/basic/test_simple_image_pitch.cpp index 1cd82b6f8d..2eb43b3a51 100644 --- a/test_conformance/basic/test_simple_image_pitch.cpp +++ b/test_conformance/basic/test_simple_image_pitch.cpp @@ -83,7 +83,7 @@ int test_simple_read_image_pitch(cl_device_id device, cl_context cl_context_, cl free(host_image); free(host_buffer); - return CL_SUCCESS; + return errors == 0 ? TEST_PASS : TEST_FAIL; } int test_simple_write_image_pitch(cl_device_id device, cl_context cl_context_, cl_command_queue q, int num_elements) @@ -149,5 +149,5 @@ int test_simple_write_image_pitch(cl_device_id device, cl_context cl_context_, c free(host_image); free(host_buffer); - return CL_SUCCESS; + return errors == 0 ? TEST_PASS : TEST_FAIL; } diff --git a/test_conformance/basic/test_sizeof.cpp b/test_conformance/basic/test_sizeof.cpp index 66a6c563cf..6b1ddb56bd 100644 --- a/test_conformance/basic/test_sizeof.cpp +++ b/test_conformance/basic/test_sizeof.cpp @@ -292,11 +292,11 @@ int test_sizeof(cl_device_id device, cl_context context, cl_command_queue queue, continue; } - if( gIsEmbedded && - 0 == strcmp(other_types[i], "image3d_t") && - checkFor3DImageSupport( device ) == CL_IMAGE_FORMAT_NOT_SUPPORTED) + if (0 == strcmp(other_types[i], "image3d_t") + && checkFor3DImageSupport(device) == CL_IMAGE_FORMAT_NOT_SUPPORTED) { - log_info("\n3D images are not supported by this device. Skipping test.\t"); + log_info("\n3D images are not supported by this device. " + "Skipping test.\t"); continue; } diff --git a/test_conformance/basic/test_vector_swizzle.cpp b/test_conformance/basic/test_vector_swizzle.cpp index 67bf753754..884bcf36ff 100644 --- a/test_conformance/basic/test_vector_swizzle.cpp +++ b/test_conformance/basic/test_vector_swizzle.cpp @@ -94,11 +94,17 @@ __kernel void test_vector_swizzle_xyzw(TYPE value, __global TYPE* dst) { int index = 0; // lvalue swizzles - dst[index++].x = value.x; - dst[index++].y = value.x; - dst[index++].z = value.x; - dst[index++].xyz = value; - dst[index++].zyx = value; + TYPE t; + t = dst[index]; t.x = value.x; + vstore3(t, 0, (__global BASETYPE*)(dst + index++)); + t = dst[index]; t.y = value.x; + vstore3(t, 0, (__global BASETYPE*)(dst + index++)); + t = dst[index]; t.z = value.x; + vstore3(t, 0, (__global BASETYPE*)(dst + index++)); + t = dst[index]; t.xyz = value; + vstore3(t, 0, (__global BASETYPE*)(dst + index++)); + t = dst[index]; t.zyx = value; + vstore3(t, 0, (__global BASETYPE*)(dst + index++)); // rvalue swizzles vstore3(value.x, 0, (__global BASETYPE*)(dst + index++)); @@ -114,11 +120,17 @@ __kernel void test_vector_swizzle_rgba(TYPE value, __global TYPE* dst) { int index = 0; // lvalue swizzles - dst[index++].r = value.r; - dst[index++].g = value.r; - dst[index++].b = value.r; - dst[index++].rgb = value; - dst[index++].bgr = value; + TYPE t; + t = dst[index]; t.r = value.r; + vstore3(t, 0, (__global BASETYPE*)(dst + index++)); + t = dst[index]; t.g = value.r; + vstore3(t, 0, (__global BASETYPE*)(dst + index++)); + t = dst[index]; t.b = value.r; + vstore3(t, 0, (__global BASETYPE*)(dst + index++)); + t = dst[index]; t.rgb = value; + vstore3(t, 0, (__global BASETYPE*)(dst + index++)); + t = dst[index]; t.bgr = value; + vstore3(t, 0, (__global BASETYPE*)(dst + index++)); // rvalue swizzles vstore3(value.r, 0, (__global BASETYPE*)(dst + index++)); @@ -134,11 +146,17 @@ __kernel void test_vector_swizzle_sN(TYPE value, __global TYPE* dst) { int index = 0; // lvalue swizzles - dst[index++].s0 = value.s0; - dst[index++].s1 = value.s0; - dst[index++].s2 = value.s0; - dst[index++].s012 = value; - dst[index++].s210 = value; + TYPE t; + t = dst[index]; t.s0 = value.s0; + vstore3(t, 0, (__global BASETYPE*)(dst + index++)); + t = dst[index]; t.s1 = value.s0; + vstore3(t, 0, (__global BASETYPE*)(dst + index++)); + t = dst[index]; t.s2 = value.s0; + vstore3(t, 0, (__global BASETYPE*)(dst + index++)); + t = dst[index]; t.s012 = value; + vstore3(t, 0, (__global BASETYPE*)(dst + index++)); + t = dst[index]; t.s210 = value; + vstore3(t, 0, (__global BASETYPE*)(dst + index++)); // rvalue swizzles vstore3(value.s0, 0, (__global BASETYPE*)(dst + index++)); @@ -592,9 +610,6 @@ static int test_vectype(const char* type_name, cl_device_id device, cl_int error = CL_SUCCESS; int result = TEST_PASS; - clProgramWrapper program; - clKernelWrapper kernel; - std::string buildOptions{ "-DTYPE=" }; buildOptions += type_name; buildOptions += std::to_string(N); @@ -610,35 +625,50 @@ static int test_vectype(const char* type_name, cl_device_id device, makeReference(reference); // XYZW swizzles: + { + clProgramWrapper program; + clKernelWrapper kernel; - const char* xyzw_source = TestInfo::kernel_source_xyzw; - error = create_single_kernel_helper( - context, &program, &kernel, 1, &xyzw_source, "test_vector_swizzle_xyzw", - buildOptions.c_str()); - test_error(error, "Unable to create xyzw test kernel"); + const char* xyzw_source = TestInfo::kernel_source_xyzw; + error = create_single_kernel_helper( + context, &program, &kernel, 1, &xyzw_source, + "test_vector_swizzle_xyzw", buildOptions.c_str()); + test_error(error, "Unable to create xyzw test kernel"); - result |= test_vectype_case(value, reference, context, kernel, queue); + result |= test_vectype_case(value, reference, context, kernel, queue); + } // sN swizzles: - const char* sN_source = TestInfo::kernel_source_sN; - error = create_single_kernel_helper(context, &program, &kernel, 1, - &sN_source, "test_vector_swizzle_sN", - buildOptions.c_str()); - test_error(error, "Unable to create sN test kernel"); + { + clProgramWrapper program; + clKernelWrapper kernel; - result |= test_vectype_case(value, reference, context, kernel, queue); + const char* sN_source = TestInfo::kernel_source_sN; + error = create_single_kernel_helper( + context, &program, &kernel, 1, &sN_source, "test_vector_swizzle_sN", + buildOptions.c_str()); + test_error(error, "Unable to create sN test kernel"); + + result |= test_vectype_case(value, reference, context, kernel, queue); + } // RGBA swizzles for OpenCL 3.0 and newer: - const Version device_version = get_device_cl_version(device); - if (device_version >= Version(3, 0)) { - const char* rgba_source = TestInfo::kernel_source_rgba; - error = create_single_kernel_helper( - context, &program, &kernel, 1, &rgba_source, - "test_vector_swizzle_rgba", buildOptions.c_str()); - test_error(error, "Unable to create rgba test kernel"); + clProgramWrapper program; + clKernelWrapper kernel; - result |= test_vectype_case(value, reference, context, kernel, queue); + const Version device_version = get_device_cl_version(device); + if (device_version >= Version(3, 0)) + { + const char* rgba_source = TestInfo::kernel_source_rgba; + error = create_single_kernel_helper( + context, &program, &kernel, 1, &rgba_source, + "test_vector_swizzle_rgba", buildOptions.c_str()); + test_error(error, "Unable to create rgba test kernel"); + + result |= + test_vectype_case(value, reference, context, kernel, queue); + } } return result; diff --git a/test_conformance/buffers/test_buffer_fill.cpp b/test_conformance/buffers/test_buffer_fill.cpp index 5c1dd48e57..92079794fd 100644 --- a/test_conformance/buffers/test_buffer_fill.cpp +++ b/test_conformance/buffers/test_buffer_fill.cpp @@ -562,15 +562,13 @@ int test_buffer_fill( cl_device_id deviceID, cl_context context, cl_command_queu int loops, void *inptr[5], void *hostptr[5], void *pattern[5], size_t offset_elements, size_t fill_elements, const char *kernelCode[], const char *kernelName[], int (*fn)(void *,void *,int) ) { - cl_mem buffers[10]; void *outptr[5]; - cl_program program[5]; - cl_kernel kernel[5]; - cl_event event[2]; + clProgramWrapper program[5]; + clKernelWrapper kernel[5]; size_t ptrSizes[5]; size_t global_work_size[3]; int err; - int i, ii; + int i; int src_flag_id; int total_errors = 0; @@ -584,23 +582,39 @@ int test_buffer_fill( cl_device_id deviceID, cl_context context, cl_command_queu ptrSizes[3] = ptrSizes[2] << 1; ptrSizes[4] = ptrSizes[3] << 1; - for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) { - log_info("Testing with cl_mem_flags: %s\n", flag_set_names[src_flag_id]); + loops = (loops < 5 ? loops : 5); + for (i = 0; i < loops; i++) + { + err = create_single_kernel_helper(context, &program[i], &kernel[i], 1, + &kernelCode[i], kernelName[i]); + if (err) + { + log_error(" Error creating program for %s\n", type); + return -1; + } - loops = ( loops < 5 ? loops : 5 ); - for ( i = 0; i < loops; i++ ){ - ii = i << 1; + for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++) + { + clEventWrapper event[2]; + clMemWrapper buffers[2]; if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) - buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id], ptrSizes[i] * num_elements, hostptr[i], &err); + buffers[0] = clCreateBuffer(context, flag_set[src_flag_id], + ptrSizes[i] * num_elements, + hostptr[i], &err); else - buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id], ptrSizes[i] * num_elements, NULL, &err); - if ( !buffers[ii] || err){ + buffers[0] = + clCreateBuffer(context, flag_set[src_flag_id], + ptrSizes[i] * num_elements, NULL, &err); + if (!buffers[0] || err) + { print_error(err, "clCreateBuffer failed\n" ); return -1; } // Initialize source buffer with 0, since the validation code expects 0(s) outside of the fill region. if (!((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))) { - err = clEnqueueWriteBuffer(queue, buffers[ii], CL_FALSE, 0, ptrSizes[i]*num_elements, hostptr[i], 0, NULL, NULL); + err = clEnqueueWriteBuffer(queue, buffers[0], CL_FALSE, 0, + ptrSizes[i] * num_elements, + hostptr[i], 0, NULL, NULL); if ( err != CL_SUCCESS ){ print_error(err, "clEnqueueWriteBuffer failed\n" ); return -1; @@ -609,45 +623,33 @@ int test_buffer_fill( cl_device_id deviceID, cl_context context, cl_command_queu outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment); memset(outptr[i], 0, ptrSizes[i] * num_elements); - buffers[ii+1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, ptrSizes[i] * num_elements, outptr[i], &err); - if ( !buffers[ii+1] || err){ + buffers[1] = + clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + ptrSizes[i] * num_elements, outptr[i], &err); + if (!buffers[1] || err) + { print_error(err, "clCreateBuffer failed\n" ); - clReleaseMemObject( buffers[ii] ); align_free( outptr[i] ); return -1; } - err = clEnqueueFillBuffer(queue, buffers[ii], pattern[i], ptrSizes[i], - ptrSizes[i] * offset_elements, ptrSizes[i] * fill_elements, - 0, NULL, &(event[0])); - /* uncomment for test debugging - err = clEnqueueWriteBuffer(queue, buffers[ii], CL_FALSE, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, &(event[0])); - */ + err = clEnqueueFillBuffer( + queue, buffers[0], pattern[i], ptrSizes[i], + ptrSizes[i] * offset_elements, ptrSizes[i] * fill_elements, 0, + NULL, &(event[0])); + if ( err != CL_SUCCESS ){ print_error( err, " clEnqueueFillBuffer failed" ); - clReleaseMemObject( buffers[ii] ); - clReleaseMemObject( buffers[ii+1] ); - align_free( outptr[i] ); - return -1; - } - - err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] ); - if ( err ){ - log_error( " Error creating program for %s\n", type ); - clReleaseMemObject( buffers[ii] ); - clReleaseMemObject( buffers[ii+1] ); align_free( outptr[i] ); return -1; } - err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[ii] ); - err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&buffers[ii+1] ); + err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem), + (void *)&buffers[0]); + err |= clSetKernelArg(kernel[i], 1, sizeof(cl_mem), + (void *)&buffers[1]); if ( err != CL_SUCCESS ){ print_error( err, "clSetKernelArg failed" ); - clReleaseKernel( kernel[i] ); - clReleaseProgram( program[i] ); - clReleaseMemObject( buffers[ii] ); - clReleaseMemObject( buffers[ii+1] ); align_free( outptr[i] ); return -1; } @@ -655,14 +657,9 @@ int test_buffer_fill( cl_device_id deviceID, cl_context context, cl_command_queu err = clWaitForEvents( 1, &(event[0]) ); if ( err != CL_SUCCESS ){ print_error( err, "clWaitForEvents() failed" ); - clReleaseKernel( kernel[i] ); - clReleaseProgram( program[i] ); - clReleaseMemObject( buffers[ii] ); - clReleaseMemObject( buffers[ii+1] ); align_free( outptr[i] ); return -1; } - clReleaseEvent(event[0]); err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL ); if (err != CL_SUCCESS){ @@ -670,7 +667,9 @@ int test_buffer_fill( cl_device_id deviceID, cl_context context, cl_command_queu return -1; } - err = clEnqueueReadBuffer( queue, buffers[ii+1], false, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, &(event[1]) ); + err = clEnqueueReadBuffer(queue, buffers[1], false, 0, + ptrSizes[i] * num_elements, outptr[i], 0, + NULL, &(event[1])); if (err != CL_SUCCESS){ print_error( err, "clEnqueueReadBuffer failed" ); return -1; @@ -680,21 +679,18 @@ int test_buffer_fill( cl_device_id deviceID, cl_context context, cl_command_queu if ( err != CL_SUCCESS ){ print_error( err, "clWaitForEvents() failed" ); } - clReleaseEvent(event[1]); if ( fn( inptr[i], outptr[i], (int)(ptrSizes[i] * (size_t)num_elements / ptrSizes[0]) ) ){ - log_error( " %s%d test failed\n", type, 1<a = (cl_int)genrand_int32(d); - pattern->b = (cl_float)get_random_float( -FLT_MAX, FLT_MAX, d ); - inptr = (TestStruct *)align_malloc(ptrSize * num_elements, min_alignment); - for ( j = 0; j < offset_elements; j++ ) { - inptr[j].a = 0; - inptr[j].b =0; - } - for ( j = offset_elements; j < offset_elements + fill_elements; j++ ) { - inptr[j].a = pattern->a; - inptr[j].b = pattern->b; - } - for ( j = offset_elements + fill_elements; j < (size_t)num_elements; j++ ) { - inptr[j].a = 0; - inptr[j].b = 0; + for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++) + { + clProgramWrapper program; + clKernelWrapper kernel; + log_info("Testing with cl_mem_flags: %s\n", + flag_set_names[src_flag_id]); + + err = create_single_kernel_helper(context, &program, &kernel, 1, + &struct_kernel_code, + "read_fill_struct"); + if (err) + { + log_error(" Error creating program for struct\n"); + free_mtdata(d); + return -1; } - hostptr = (TestStruct *)align_malloc(ptrSize * num_elements, min_alignment); - memset(hostptr, 0, ptrSize * num_elements); + // Test with random offsets and fill sizes + for (n = 0; n < 8; n++) + { + clEventWrapper event[2]; + clMemWrapper buffers[2]; + void *outptr; + TestStruct *inptr; + TestStruct *hostptr; + + offset_elements = + (size_t)get_random_float(0.f, (float)(num_elements - 8), d); + fill_elements = (size_t)get_random_float( + 8.f, (float)(num_elements - offset_elements), d); + log_info("Testing random fill from offset %d for %d elements: \n", + (int)offset_elements, (int)fill_elements); + + pattern.a = (cl_int)genrand_int32(d); + pattern.b = (cl_float)get_random_float(-FLT_MAX, FLT_MAX, d); + + inptr = (TestStruct *)align_malloc(ptrSize * num_elements, + min_alignment); + for (j = 0; j < offset_elements; j++) + { + inptr[j].a = 0; + inptr[j].b = 0; + } + for (j = offset_elements; j < offset_elements + fill_elements; j++) + { + inptr[j].a = pattern.a; + inptr[j].b = pattern.b; + } + for (j = offset_elements + fill_elements; j < (size_t)num_elements; + j++) + { + inptr[j].a = 0; + inptr[j].b = 0; + } - for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) { - log_info("Testing with cl_mem_flags: %s\n", flag_set_names[src_flag_id]); + hostptr = (TestStruct *)align_malloc(ptrSize * num_elements, + min_alignment); + memset(hostptr, 0, ptrSize * num_elements); if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) buffers[0] = clCreateBuffer(context, flag_set[src_flag_id], ptrSize * num_elements, hostptr, &err); @@ -762,9 +781,6 @@ int test_buffer_fill_struct( cl_device_id deviceID, cl_context context, cl_comma buffers[0] = clCreateBuffer(context, flag_set[src_flag_id], ptrSize * num_elements, NULL, &err); if ( err ){ print_error(err, " clCreateBuffer failed\n" ); - clReleaseEvent( event[0] ); - clReleaseEvent( event[1] ); - free( (void *)pattern ); align_free( (void *)inptr ); align_free( (void *)hostptr ); free_mtdata(d); @@ -774,9 +790,6 @@ int test_buffer_fill_struct( cl_device_id deviceID, cl_context context, cl_comma err = clEnqueueWriteBuffer(queue, buffers[0], CL_FALSE, 0, ptrSize * num_elements, hostptr, 0, NULL, NULL); if ( err != CL_SUCCESS ){ print_error(err, " clEnqueueWriteBuffer failed\n" ); - clReleaseEvent( event[0] ); - clReleaseEvent( event[1] ); - free( (void *)pattern ); align_free( (void *)inptr ); align_free( (void *)hostptr ); free_mtdata(d); @@ -789,45 +802,21 @@ int test_buffer_fill_struct( cl_device_id deviceID, cl_context context, cl_comma if ( ! buffers[1] || err){ print_error(err, " clCreateBuffer failed\n" ); align_free( outptr ); - clReleaseMemObject(buffers[0]); - clReleaseEvent( event[0] ); - clReleaseEvent( event[1] ); - free( (void *)pattern ); align_free( (void *)inptr ); align_free( (void *)hostptr ); free_mtdata(d); return -1; } - err = clEnqueueFillBuffer(queue, buffers[0], pattern, ptrSize, - ptrSize * offset_elements, ptrSize * fill_elements, - 0, NULL, &(event[0])); + err = clEnqueueFillBuffer( + queue, buffers[0], &pattern, ptrSize, ptrSize * offset_elements, + ptrSize * fill_elements, 0, NULL, &(event[0])); /* uncomment for test debugging err = clEnqueueWriteBuffer(queue, buffers[0], CL_FALSE, 0, ptrSize * num_elements, inptr, 0, NULL, &(event[0])); */ if ( err != CL_SUCCESS ){ print_error( err, " clEnqueueFillBuffer failed" ); align_free( outptr ); - clReleaseMemObject(buffers[0]); - clReleaseMemObject(buffers[1]); - clReleaseEvent( event[0] ); - clReleaseEvent( event[1] ); - free( (void *)pattern ); - align_free( (void *)inptr ); - align_free( (void *)hostptr ); - free_mtdata(d); - return -1; - } - - err = create_single_kernel_helper( context, &program, &kernel, 1, &struct_kernel_code, "read_fill_struct" ); - if ( err ){ - log_error( " Error creating program for struct\n" ); - align_free( outptr ); - clReleaseMemObject(buffers[0]); - clReleaseMemObject(buffers[1]); - clReleaseEvent( event[0] ); - clReleaseEvent( event[1] ); - free( (void *)pattern ); align_free( (void *)inptr ); align_free( (void *)hostptr ); free_mtdata(d); @@ -838,14 +827,7 @@ int test_buffer_fill_struct( cl_device_id deviceID, cl_context context, cl_comma err |= clSetKernelArg( kernel, 1, sizeof( cl_mem ), (void *)&buffers[1] ); if ( err != CL_SUCCESS ){ print_error( err, " clSetKernelArg failed" ); - clReleaseKernel( kernel ); - clReleaseProgram( program ); align_free( outptr ); - clReleaseMemObject(buffers[0]); - clReleaseMemObject(buffers[1]); - clReleaseEvent( event[0] ); - clReleaseEvent( event[1] ); - free( (void *)pattern ); align_free( (void *)inptr ); align_free( (void *)hostptr ); free_mtdata(d); @@ -855,32 +837,17 @@ int test_buffer_fill_struct( cl_device_id deviceID, cl_context context, cl_comma err = clWaitForEvents( 1, &(event[0]) ); if ( err != CL_SUCCESS ){ print_error( err, "clWaitForEvents() failed" ); - clReleaseKernel( kernel ); - clReleaseProgram( program ); align_free( outptr ); - clReleaseMemObject(buffers[0]); - clReleaseMemObject(buffers[1]); - clReleaseEvent( event[0] ); - clReleaseEvent( event[1] ); - free( (void *)pattern ); align_free( (void *)inptr ); align_free( (void *)hostptr ); free_mtdata(d); return -1; } - clReleaseEvent( event[0] ); err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL ); if ( err != CL_SUCCESS ){ print_error( err, " clEnqueueNDRangeKernel failed" ); - clReleaseKernel( kernel ); - clReleaseProgram( program ); align_free( outptr ); - clReleaseMemObject(buffers[0]); - clReleaseMemObject(buffers[1]); - clReleaseEvent( event[0] ); - clReleaseEvent( event[1] ); - free( (void *)pattern ); align_free( (void *)inptr ); align_free( (void *)hostptr ); free_mtdata(d); @@ -890,14 +857,7 @@ int test_buffer_fill_struct( cl_device_id deviceID, cl_context context, cl_comma err = clEnqueueReadBuffer( queue, buffers[1], CL_FALSE, 0, ptrSize * num_elements, outptr, 0, NULL, &(event[1]) ); if ( err != CL_SUCCESS ){ print_error( err, " clEnqueueReadBuffer failed" ); - clReleaseKernel( kernel ); - clReleaseProgram( program ); align_free( outptr ); - clReleaseMemObject(buffers[0]); - clReleaseMemObject(buffers[1]); - clReleaseEvent( event[0] ); - clReleaseEvent( event[1] ); - free( (void *)pattern ); align_free( (void *)inptr ); align_free( (void *)hostptr ); free_mtdata(d); @@ -908,7 +868,6 @@ int test_buffer_fill_struct( cl_device_id deviceID, cl_context context, cl_comma if ( err != CL_SUCCESS ){ print_error( err, "clWaitForEvents() failed" ); } - clReleaseEvent( event[1] ); if ( verify_fill_struct( inptr, outptr, num_elements) ) { log_error( " buffer_FILL async struct test failed\n" ); @@ -918,15 +877,10 @@ int test_buffer_fill_struct( cl_device_id deviceID, cl_context context, cl_comma log_info( " buffer_FILL async struct test passed\n" ); } // cleanup - clReleaseKernel( kernel ); - clReleaseProgram( program ); align_free( outptr ); - clReleaseMemObject( buffers[0] ); - clReleaseMemObject( buffers[1] ); + align_free((void *)inptr); + align_free((void *)hostptr); } // src cl_mem_flag - free( (void *)pattern ); - align_free( (void *)inptr ); - align_free( (void *)hostptr ); } free_mtdata(d); diff --git a/test_conformance/buffers/test_buffer_map.cpp b/test_conformance/buffers/test_buffer_map.cpp index f0363dd5b5..382c7a3516 100644 --- a/test_conformance/buffers/test_buffer_map.cpp +++ b/test_conformance/buffers/test_buffer_map.cpp @@ -554,10 +554,9 @@ static int verify_read_struct( void *ptr, int n ) static int test_buffer_map_read( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops, const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) ) { - cl_mem buffers[5]; void *outptr[5]; - cl_program program[5]; - cl_kernel kernel[5]; + clProgramWrapper program[5]; + clKernelWrapper kernel[5]; size_t threads[3], localThreads[3]; cl_int err; int i; @@ -580,10 +579,20 @@ static int test_buffer_map_read( cl_device_id deviceID, cl_context context, cl_c if (! gHasLong && strstr(type,"long")) return 0; - for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) { - log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]); + for (i = 0; i < loops; i++) + { - for ( i = 0; i < loops; i++ ){ + err = create_single_kernel_helper(context, &program[i], &kernel[i], 1, + &kernelCode[i], kernelName[i]); + if (err) + { + log_error(" Error creating program for %s\n", type); + return -1; + } + + for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++) + { + clMemWrapper buffer; outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment); if ( ! outptr[i] ){ log_error( " unable to allocate %d bytes of memory\n", (int)ptrSizes[i] * num_elements ); @@ -591,31 +600,24 @@ static int test_buffer_map_read( cl_device_id deviceID, cl_context context, cl_c } if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) - buffers[i] = clCreateBuffer(context, flag_set[src_flag_id], ptrSizes[i] * num_elements, outptr[i], &err); + buffer = + clCreateBuffer(context, flag_set[src_flag_id], + ptrSizes[i] * num_elements, outptr[i], &err); else - buffers[i] = clCreateBuffer(context, flag_set[src_flag_id], ptrSizes[i] * num_elements, NULL, &err); + buffer = clCreateBuffer(context, flag_set[src_flag_id], + ptrSizes[i] * num_elements, NULL, &err); - if (!buffers[i] || err) + if (!buffer || err) { print_error(err, "clCreateBuffer failed\n" ); align_free( outptr[i] ); return -1; } - err = create_single_kernel_helper(context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] ); - if ( err ){ - log_error( " Error creating program for %s\n", type ); - clReleaseMemObject( buffers[i] ); - align_free( outptr[i] ); - return -1; - } + err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem), (void *)&buffer); - err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] ); if ( err != CL_SUCCESS ){ print_error( err, "clSetKernelArg failed\n" ); - clReleaseKernel( kernel[i] ); - clReleaseProgram( program[i] ); - clReleaseMemObject( buffers[i] ); align_free( outptr[i] ); return -1; } @@ -628,39 +630,34 @@ static int test_buffer_map_read( cl_device_id deviceID, cl_context context, cl_c err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, localThreads, 0, NULL, NULL ); if ( err != CL_SUCCESS ){ print_error( err, "clEnqueueNDRangeKernel failed\n" ); - clReleaseKernel( kernel[i] ); - clReleaseProgram( program[i] ); - clReleaseMemObject( buffers[i] ); align_free( outptr[i] ); return -1; } - mappedPtr = clEnqueueMapBuffer(queue, buffers[i], CL_TRUE, CL_MAP_READ, 0, ptrSizes[i]*num_elements, 0, NULL, NULL, &err); - if ( err != CL_SUCCESS ){ + mappedPtr = clEnqueueMapBuffer(queue, buffer, CL_TRUE, CL_MAP_READ, + 0, ptrSizes[i] * num_elements, 0, + NULL, NULL, &err); + if (err != CL_SUCCESS) + { print_error( err, "clEnqueueMapBuffer failed" ); - clReleaseKernel( kernel[i] ); - clReleaseProgram( program[i] ); - clReleaseMemObject( buffers[i] ); align_free( outptr[i] ); return -1; } if (fn(mappedPtr, num_elements*(1< + // Design: // To test sub buffers, we first create one main buffer. We then create several sub-buffers and // queue Actions on each one. Each Action is encapsulated in a class so it can keep track of @@ -39,7 +41,8 @@ class SubBufferWrapper : public clMemWrapper region.size = mSize; cl_int error; - mMem = clCreateSubBuffer( mParentBuffer, flags, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error ); + reset(clCreateSubBuffer(mParentBuffer, flags, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error)); return error; } }; @@ -100,13 +103,6 @@ class ReadWriteAction : public Action } }; -#ifndef MAX -#define MAX( _a, _b ) ( (_a) > (_b) ? (_a) : (_b) ) -#endif -#ifndef MIN -#define MIN( _a, _b ) ( (_a) < (_b) ? (_a) : (_b) ) -#endif - class CopyAction : public Action { public: @@ -116,7 +112,8 @@ class CopyAction : public Action virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState ) { // Copy from sub-buffer 1 to sub-buffer 2 - size_t size = get_random_size_t( 0, MIN( buffer1.mSize, buffer2.mSize ), GetRandSeed() ); + size_t size = get_random_size_t( + 0, std::min(buffer1.mSize, buffer2.mSize), GetRandSeed()); size_t startOffset = get_random_size_t( 0, buffer1.mSize - size, GetRandSeed() ); size_t endOffset = get_random_size_t( 0, buffer2.mSize - size, GetRandSeed() ); @@ -265,7 +262,11 @@ int test_sub_buffers_read_write_core( cl_context context, cl_command_queue queue endRange = mainSize; size_t offset = get_random_size_t( toStartFrom / addressAlign, endRange / addressAlign, Action::GetRandSeed() ) * addressAlign; - size_t size = get_random_size_t( 1, ( MIN( mainSize / 8, mainSize - offset ) ) / addressAlign, Action::GetRandSeed() ) * addressAlign; + size_t size = + get_random_size_t( + 1, (std::min(mainSize / 8, mainSize - offset)) / addressAlign, + Action::GetRandSeed()) + * addressAlign; error = subBuffers[ numSubBuffers ].Allocate( mainBuffer, CL_MEM_READ_WRITE, offset, size ); test_error( error, "Unable to allocate sub buffer" ); @@ -442,7 +443,7 @@ int test_sub_buffers_read_write_dual_devices( cl_device_id deviceID, cl_context error = get_reasonable_buffer_size( otherDevice, maxBuffer2 ); test_error( error, "Unable to get buffer size for secondary device" ); - maxBuffer1 = MIN( maxBuffer1, maxBuffer2 ); + maxBuffer1 = std::min(maxBuffer1, maxBuffer2); cl_uint addressAlign1Bits, addressAlign2Bits; error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign1Bits ), &addressAlign1Bits, NULL ); @@ -451,7 +452,7 @@ int test_sub_buffers_read_write_dual_devices( cl_device_id deviceID, cl_context error = clGetDeviceInfo( otherDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign2Bits ), &addressAlign2Bits, NULL ); test_error( error, "Unable to get secondary device's address alignment" ); - cl_uint addressAlign1 = MAX( addressAlign1Bits, addressAlign2Bits ) / 8; + cl_uint addressAlign1 = std::max(addressAlign1Bits, addressAlign2Bits) / 8; // Finally time to run! return test_sub_buffers_read_write_core( testingContext, queue1, queue2, maxBuffer1, addressAlign1 ); diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h index bbcc68c657..d30259f0f1 100644 --- a/test_conformance/c11_atomics/common.h +++ b/test_conformance/c11_atomics/common.h @@ -28,10 +28,9 @@ #define MAX_DEVICE_THREADS (gHost ? 0U : gMaxDeviceThreads) #define MAX_HOST_THREADS GetThreadCount() -#define EXECUTE_TEST(error, test)\ - error |= test;\ - if(error && !gContinueOnError)\ - return error; +#define EXECUTE_TEST(error, test) \ + error |= test; \ + if (error && !gContinueOnError) return error; enum TExplicitAtomicType { @@ -57,764 +56,918 @@ enum TExplicitMemoryScopeType MEMORY_SCOPE_ALL_SVM_DEVICES }; -extern bool gHost; // temporary flag for testing native host threads (test verification) +extern bool + gHost; // temporary flag for testing native host threads (test verification) extern bool gOldAPI; // temporary flag for testing with old API (OpenCL 1.2) extern bool gContinueOnError; // execute all cases even when errors detected -extern bool gNoGlobalVariables; // disable cases with global atomics in program scope +extern bool + gNoGlobalVariables; // disable cases with global atomics in program scope extern bool gNoGenericAddressSpace; // disable cases with generic address space extern bool gUseHostPtr; // use malloc/free instead of clSVMAlloc/clSVMFree extern bool gDebug; // print OpenCL kernel code -extern int gInternalIterations; // internal test iterations for atomic operation, sufficient to verify atomicity -extern int gMaxDeviceThreads; // maximum number of threads executed on OCL device +extern int gInternalIterations; // internal test iterations for atomic + // operation, sufficient to verify atomicity +extern int + gMaxDeviceThreads; // maximum number of threads executed on OCL device extern cl_device_atomic_capabilities gAtomicMemCap, gAtomicFenceCap; // atomic memory and fence capabilities for this device -extern const char *get_memory_order_type_name(TExplicitMemoryOrderType orderType); -extern const char *get_memory_scope_type_name(TExplicitMemoryScopeType scopeType); +extern const char * +get_memory_order_type_name(TExplicitMemoryOrderType orderType); +extern const char * +get_memory_scope_type_name(TExplicitMemoryScopeType scopeType); extern cl_int getSupportedMemoryOrdersAndScopes( cl_device_id device, std::vector &memoryOrders, std::vector &memoryScopes); -class AtomicTypeInfo -{ +class AtomicTypeInfo { public: - TExplicitAtomicType _type; - AtomicTypeInfo(TExplicitAtomicType type): _type(type) {} - cl_uint Size(cl_device_id device); - const char* AtomicTypeName(); - const char* RegularTypeName(); - const char* AddSubOperandTypeName(); - int IsSupported(cl_device_id device); + TExplicitAtomicType _type; + AtomicTypeInfo(TExplicitAtomicType type): _type(type) {} + cl_uint Size(cl_device_id device); + const char *AtomicTypeName(); + const char *RegularTypeName(); + const char *AddSubOperandTypeName(); + int IsSupported(cl_device_id device); }; -template -class AtomicTypeExtendedInfo : public AtomicTypeInfo -{ +template +class AtomicTypeExtendedInfo : public AtomicTypeInfo { public: - AtomicTypeExtendedInfo(TExplicitAtomicType type) : AtomicTypeInfo(type) {} - HostDataType MinValue(); - HostDataType MaxValue(); - HostDataType SpecialValue(cl_uchar x) - { - HostDataType tmp; - cl_uchar *ptr = (cl_uchar*)&tmp; - for(cl_uint i = 0; i < sizeof(HostDataType)/sizeof(cl_uchar); i++) - ptr[i] = x; - return tmp; - } - HostDataType SpecialValue(cl_ushort x) - { - HostDataType tmp; - cl_ushort *ptr = (cl_ushort*)&tmp; - for(cl_uint i = 0; i < sizeof(HostDataType)/sizeof(cl_ushort); i++) - ptr[i] = x; - return tmp; - } + AtomicTypeExtendedInfo(TExplicitAtomicType type): AtomicTypeInfo(type) {} + HostDataType MinValue(); + HostDataType MaxValue(); + HostDataType SpecialValue(cl_uchar x) + { + HostDataType tmp; + cl_uchar *ptr = (cl_uchar *)&tmp; + for (cl_uint i = 0; i < sizeof(HostDataType) / sizeof(cl_uchar); i++) + ptr[i] = x; + return tmp; + } + HostDataType SpecialValue(cl_ushort x) + { + HostDataType tmp; + cl_ushort *ptr = (cl_ushort *)&tmp; + for (cl_uint i = 0; i < sizeof(HostDataType) / sizeof(cl_ushort); i++) + ptr[i] = x; + return tmp; + } }; -class CTest { +class CTest { public: - virtual int Execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) = 0; + virtual int Execute(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) = 0; }; -template -class CBasicTest : CTest -{ +template +class CBasicTest : CTest { public: - typedef struct { - CBasicTest *test; - cl_uint tid; - cl_uint threadCount; - volatile HostAtomicType *destMemory; - HostDataType *oldValues; - } THostThreadContext; - static cl_int HostThreadFunction(cl_uint job_id, cl_uint thread_id, void *userInfo) - { - THostThreadContext *threadContext = ((THostThreadContext*)userInfo)+job_id; - threadContext->test->HostFunction(threadContext->tid, threadContext->threadCount, threadContext->destMemory, threadContext->oldValues); - return 0; - } - CBasicTest(TExplicitAtomicType dataType, bool useSVM) : CTest(), - _maxDeviceThreads(MAX_DEVICE_THREADS), - _dataType(dataType), _useSVM(useSVM), _startValue(255), - _localMemory(false), _declaredInProgram(false), - _usedInFunction(false), _genericAddrSpace(false), - _oldValueCheck(true), _localRefValues(false), - _maxGroupSize(0), _passCount(0), _iterations(gInternalIterations) - { - } - virtual ~CBasicTest() - { - if(_passCount) - log_info(" %u tests executed successfully for %s\n", _passCount, DataType().AtomicTypeName()); - } - virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) - { - return 1; - } - virtual cl_uint NumNonAtomicVariablesPerThread() - { - return 1; - } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) - { - return false; - } - virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) - { - return false; - } - virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) - { - return false; - } - virtual std::string PragmaHeader(cl_device_id deviceID); - virtual std::string ProgramHeader(cl_uint maxNumDestItems); - virtual std::string FunctionCode(); - virtual std::string KernelCode(cl_uint maxNumDestItems); - virtual std::string ProgramCore() = 0; - virtual std::string SingleTestName() - { - std::string testName = LocalMemory() ? "local" : "global"; - testName += " "; - testName += DataType().AtomicTypeName(); - if(DeclaredInProgram()) - { - testName += " declared in program"; - } - if(DeclaredInProgram() && UsedInFunction()) - testName += ","; - if(UsedInFunction()) - { - testName += " used in "; - if(GenericAddrSpace()) - testName += "generic "; - testName += "function"; - } - return testName; - } - virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue); - int ExecuteForEachPointerType(cl_device_id deviceID, cl_context context, cl_command_queue queue) - { - int error = 0; - UsedInFunction(false); - EXECUTE_TEST(error, ExecuteSingleTest(deviceID, context, queue)); - UsedInFunction(true); - GenericAddrSpace(false); - EXECUTE_TEST(error, ExecuteSingleTest(deviceID, context, queue)); - GenericAddrSpace(true); - EXECUTE_TEST(error, ExecuteSingleTest(deviceID, context, queue)); - GenericAddrSpace(false); - return error; - } - int ExecuteForEachDeclarationType(cl_device_id deviceID, cl_context context, cl_command_queue queue) - { - int error = 0; - DeclaredInProgram(false); - EXECUTE_TEST(error, ExecuteForEachPointerType(deviceID, context, queue)); - if(!UseSVM()) - { - DeclaredInProgram(true); - EXECUTE_TEST(error, ExecuteForEachPointerType(deviceID, context, queue)); - } - return error; - } - virtual int ExecuteForEachParameterSet(cl_device_id deviceID, cl_context context, cl_command_queue queue) - { - int error = 0; - if(_maxDeviceThreads > 0 && !UseSVM()) - { - LocalMemory(true); - EXECUTE_TEST(error, ExecuteForEachDeclarationType(deviceID, context, queue)); - } - if(_maxDeviceThreads+MaxHostThreads() > 0) - { - LocalMemory(false); - EXECUTE_TEST(error, ExecuteForEachDeclarationType(deviceID, context, queue)); - } - return error; - } - virtual int Execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) - { - if(sizeof(HostAtomicType) != DataType().Size(deviceID)) - { - log_info("Invalid test: Host atomic type size (%u) is different than OpenCL type size (%u)\n", (cl_uint)sizeof(HostAtomicType), DataType().Size(deviceID)); - return -1; - } - if(sizeof(HostAtomicType) != sizeof(HostDataType)) - { - log_info("Invalid test: Host atomic type size (%u) is different than corresponding type size (%u)\n", (cl_uint)sizeof(HostAtomicType), (cl_uint)sizeof(HostDataType)); - return -1; - } - // Verify we can run first - if(UseSVM() && !gUseHostPtr) - { - cl_device_svm_capabilities caps; - cl_int error = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES, sizeof(caps), &caps, 0); - test_error(error, "clGetDeviceInfo failed"); - if((caps & CL_DEVICE_SVM_ATOMICS) == 0) - { - log_info("\t%s - SVM_ATOMICS not supported\n", DataType().AtomicTypeName()); - // implicit pass + typedef struct + { + CBasicTest *test; + cl_uint tid; + cl_uint threadCount; + volatile HostAtomicType *destMemory; + HostDataType *oldValues; + } THostThreadContext; + static cl_int HostThreadFunction(cl_uint job_id, cl_uint thread_id, + void *userInfo) + { + THostThreadContext *threadContext = + ((THostThreadContext *)userInfo) + job_id; + threadContext->test->HostFunction( + threadContext->tid, threadContext->threadCount, + threadContext->destMemory, threadContext->oldValues); return 0; - } } - if(!DataType().IsSupported(deviceID)) + CBasicTest(TExplicitAtomicType dataType, bool useSVM) + : CTest(), _maxDeviceThreads(MAX_DEVICE_THREADS), _dataType(dataType), + _useSVM(useSVM), _startValue(255), _localMemory(false), + _declaredInProgram(false), _usedInFunction(false), + _genericAddrSpace(false), _oldValueCheck(true), + _localRefValues(false), _maxGroupSize(0), _passCount(0), + _iterations(gInternalIterations) + {} + virtual ~CBasicTest() + { + if (_passCount) + log_info(" %u tests executed successfully for %s\n", _passCount, + DataType().AtomicTypeName()); + } + virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) + { + return 1; + } + virtual cl_uint NumNonAtomicVariablesPerThread() { return 1; } + virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) + { + return false; + } + virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, + MTdata d) + { + return false; + } + virtual bool VerifyRefs(bool &correct, cl_uint threadCount, + HostDataType *refValues, + HostAtomicType *finalValues) { - log_info("\t%s not supported\n", DataType().AtomicTypeName()); - // implicit pass or host test (debug feature) - if(UseSVM()) + return false; + } + virtual std::string PragmaHeader(cl_device_id deviceID); + virtual std::string ProgramHeader(cl_uint maxNumDestItems); + virtual std::string FunctionCode(); + virtual std::string KernelCode(cl_uint maxNumDestItems); + virtual std::string ProgramCore() = 0; + virtual std::string SingleTestName() + { + std::string testName = LocalMemory() ? "local" : "global"; + testName += " "; + testName += DataType().AtomicTypeName(); + if (DeclaredInProgram()) + { + testName += " declared in program"; + } + if (DeclaredInProgram() && UsedInFunction()) testName += ","; + if (UsedInFunction()) + { + testName += " used in "; + if (GenericAddrSpace()) testName += "generic "; + testName += "function"; + } + return testName; + } + virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, + cl_command_queue queue); + int ExecuteForEachPointerType(cl_device_id deviceID, cl_context context, + cl_command_queue queue) + { + int error = 0; + UsedInFunction(false); + EXECUTE_TEST(error, ExecuteSingleTest(deviceID, context, queue)); + UsedInFunction(true); + GenericAddrSpace(false); + EXECUTE_TEST(error, ExecuteSingleTest(deviceID, context, queue)); + GenericAddrSpace(true); + EXECUTE_TEST(error, ExecuteSingleTest(deviceID, context, queue)); + GenericAddrSpace(false); + return error; + } + int ExecuteForEachDeclarationType(cl_device_id deviceID, cl_context context, + cl_command_queue queue) + { + int error = 0; + DeclaredInProgram(false); + EXECUTE_TEST(error, + ExecuteForEachPointerType(deviceID, context, queue)); + if (!UseSVM()) + { + DeclaredInProgram(true); + EXECUTE_TEST(error, + ExecuteForEachPointerType(deviceID, context, queue)); + } + return error; + } + virtual int ExecuteForEachParameterSet(cl_device_id deviceID, + cl_context context, + cl_command_queue queue) + { + int error = 0; + if (_maxDeviceThreads > 0 && !UseSVM()) + { + LocalMemory(true); + EXECUTE_TEST( + error, ExecuteForEachDeclarationType(deviceID, context, queue)); + } + if (_maxDeviceThreads + MaxHostThreads() > 0) + { + LocalMemory(false); + EXECUTE_TEST( + error, ExecuteForEachDeclarationType(deviceID, context, queue)); + } + return error; + } + virtual int Execute(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) + { + if (sizeof(HostAtomicType) != DataType().Size(deviceID)) + { + log_info("Invalid test: Host atomic type size (%u) is different " + "than OpenCL type size (%u)\n", + (cl_uint)sizeof(HostAtomicType), + DataType().Size(deviceID)); + return -1; + } + if (sizeof(HostAtomicType) != sizeof(HostDataType)) + { + log_info("Invalid test: Host atomic type size (%u) is different " + "than corresponding type size (%u)\n", + (cl_uint)sizeof(HostAtomicType), + (cl_uint)sizeof(HostDataType)); + return -1; + } + // Verify we can run first + if (UseSVM() && !gUseHostPtr) + { + cl_device_svm_capabilities caps; + cl_int error = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES, + sizeof(caps), &caps, 0); + test_error(error, "clGetDeviceInfo failed"); + if ((caps & CL_DEVICE_SVM_ATOMICS) == 0) + { + log_info("\t%s - SVM_ATOMICS not supported\n", + DataType().AtomicTypeName()); + // implicit pass + return 0; + } + } + if (!DataType().IsSupported(deviceID)) + { + log_info("\t%s not supported\n", DataType().AtomicTypeName()); + // implicit pass or host test (debug feature) + if (UseSVM()) return 0; + _maxDeviceThreads = 0; + } + if (_maxDeviceThreads + MaxHostThreads() == 0) return 0; + return ExecuteForEachParameterSet(deviceID, context, queue); + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + log_info("Empty thread function %u\n", (cl_uint)tid); + } + AtomicTypeExtendedInfo DataType() const + { + return AtomicTypeExtendedInfo(_dataType); + } + cl_uint _maxDeviceThreads; + virtual cl_uint MaxHostThreads() + { + if (UseSVM() || gHost) + return MAX_HOST_THREADS; + else + return 0; + } + + int CheckCapabilities(TExplicitMemoryScopeType memoryScope, + TExplicitMemoryOrderType memoryOrder) + { + /* + Differentiation between atomic fence and other atomic operations + does not need to occur here. + + The initialisation of this test checks that the minimum required + capabilities are supported by this device. + + The following switches allow the test to skip if optional + capabilites are not supported by the device. + */ + switch (memoryScope) + { + case MEMORY_SCOPE_EMPTY: { + break; + } + case MEMORY_SCOPE_WORK_GROUP: { + if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) == 0) + { + return TEST_SKIPPED_ITSELF; + } + break; + } + case MEMORY_SCOPE_DEVICE: { + if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE) == 0) + { + return TEST_SKIPPED_ITSELF; + } + break; + } + case MEMORY_SCOPE_ALL_DEVICES: // fallthough + case MEMORY_SCOPE_ALL_SVM_DEVICES: { + if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) == 0) + { + return TEST_SKIPPED_ITSELF; + } + break; + } + default: { + log_info("Invalid memory scope\n"); + break; + } + } + + switch (memoryOrder) + { + case MEMORY_ORDER_EMPTY: { + break; + } + case MEMORY_ORDER_RELAXED: { + if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_RELAXED) == 0) + { + return TEST_SKIPPED_ITSELF; + } + break; + } + case MEMORY_ORDER_ACQUIRE: + case MEMORY_ORDER_RELEASE: + case MEMORY_ORDER_ACQ_REL: { + if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) == 0) + { + return TEST_SKIPPED_ITSELF; + } + break; + } + case MEMORY_ORDER_SEQ_CST: { + if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) == 0) + { + return TEST_SKIPPED_ITSELF; + } + break; + } + default: { + log_info("Invalid memory order\n"); + break; + } + } + return 0; - _maxDeviceThreads = 0; - } - if(_maxDeviceThreads+MaxHostThreads() == 0) - return 0; - return ExecuteForEachParameterSet(deviceID, context, queue); - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - log_info("Empty thread function %u\n", (cl_uint)tid); - } - AtomicTypeExtendedInfo DataType() const - { - return AtomicTypeExtendedInfo(_dataType); - } - cl_uint _maxDeviceThreads; - virtual cl_uint MaxHostThreads() - { - if(UseSVM() || gHost) - return MAX_HOST_THREADS; - else - return 0; - } - - int CheckCapabilities(TExplicitMemoryScopeType memoryScope, - TExplicitMemoryOrderType memoryOrder) - { - /* - Differentiation between atomic fence and other atomic operations - does not need to occur here. - - The initialisation of this test checks that the minimum required - capabilities are supported by this device. - - The following switches allow the test to skip if optional capabilites - are not supported by the device. - */ - switch (memoryScope) - { - case MEMORY_SCOPE_EMPTY: { - break; - } - case MEMORY_SCOPE_WORK_GROUP: { - if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) == 0) - { - return TEST_SKIPPED_ITSELF; - } - break; - } - case MEMORY_SCOPE_DEVICE: { - if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE) == 0) - { - return TEST_SKIPPED_ITSELF; - } - break; - } - case MEMORY_SCOPE_ALL_DEVICES: // fallthough - case MEMORY_SCOPE_ALL_SVM_DEVICES: { - if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) == 0) - { - return TEST_SKIPPED_ITSELF; - } - break; - } - default: { - log_info("Invalid memory scope\n"); - break; - } - } - - switch (memoryOrder) - { - case MEMORY_ORDER_EMPTY: { - break; - } - case MEMORY_ORDER_RELAXED: { - if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_RELAXED) == 0) - { - return TEST_SKIPPED_ITSELF; - } - break; - } - case MEMORY_ORDER_ACQUIRE: - case MEMORY_ORDER_RELEASE: - case MEMORY_ORDER_ACQ_REL: { - if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) == 0) - { - return TEST_SKIPPED_ITSELF; - } - break; - } - case MEMORY_ORDER_SEQ_CST: { - if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) == 0) - { - return TEST_SKIPPED_ITSELF; - } - break; - } - default: { - log_info("Invalid memory order\n"); - break; - } - } - - return 0; - } - virtual bool SVMDataBufferAllSVMConsistent() {return false;} - bool UseSVM() {return _useSVM;} - void StartValue(HostDataType startValue) {_startValue = startValue;} - HostDataType StartValue() {return _startValue;} - void LocalMemory(bool local) {_localMemory = local;} - bool LocalMemory() {return _localMemory;} - void DeclaredInProgram(bool declaredInProgram) {_declaredInProgram = declaredInProgram;} - bool DeclaredInProgram() {return _declaredInProgram;} - void UsedInFunction(bool local) {_usedInFunction = local;} - bool UsedInFunction() {return _usedInFunction;} - void GenericAddrSpace(bool genericAddrSpace) {_genericAddrSpace = genericAddrSpace;} - bool GenericAddrSpace() {return _genericAddrSpace;} - void OldValueCheck(bool check) {_oldValueCheck = check;} - bool OldValueCheck() {return _oldValueCheck;} - void LocalRefValues(bool localRefValues) {_localRefValues = localRefValues;} - bool LocalRefValues() {return _localRefValues;} - void MaxGroupSize(cl_uint maxGroupSize) {_maxGroupSize = maxGroupSize;} - cl_uint MaxGroupSize() {return _maxGroupSize;} - void CurrentGroupSize(cl_uint currentGroupSize) - { - if(MaxGroupSize() && MaxGroupSize() < currentGroupSize) - _currentGroupSize = MaxGroupSize(); - else - _currentGroupSize = currentGroupSize; - } - cl_uint CurrentGroupSize() {return _currentGroupSize;} - virtual cl_uint CurrentGroupNum(cl_uint threadCount) - { - if(threadCount == 0) - return 0; - if(LocalMemory()) - return 1; - return threadCount/CurrentGroupSize(); - } - cl_int Iterations() {return _iterations;} - std::string IterationsStr() {std::stringstream ss; ss << _iterations; return ss.str();} + } + virtual bool SVMDataBufferAllSVMConsistent() { return false; } + bool UseSVM() { return _useSVM; } + void StartValue(HostDataType startValue) { _startValue = startValue; } + HostDataType StartValue() { return _startValue; } + void LocalMemory(bool local) { _localMemory = local; } + bool LocalMemory() { return _localMemory; } + void DeclaredInProgram(bool declaredInProgram) + { + _declaredInProgram = declaredInProgram; + } + bool DeclaredInProgram() { return _declaredInProgram; } + void UsedInFunction(bool local) { _usedInFunction = local; } + bool UsedInFunction() { return _usedInFunction; } + void GenericAddrSpace(bool genericAddrSpace) + { + _genericAddrSpace = genericAddrSpace; + } + bool GenericAddrSpace() { return _genericAddrSpace; } + void OldValueCheck(bool check) { _oldValueCheck = check; } + bool OldValueCheck() { return _oldValueCheck; } + void LocalRefValues(bool localRefValues) + { + _localRefValues = localRefValues; + } + bool LocalRefValues() { return _localRefValues; } + void MaxGroupSize(cl_uint maxGroupSize) { _maxGroupSize = maxGroupSize; } + cl_uint MaxGroupSize() { return _maxGroupSize; } + void CurrentGroupSize(cl_uint currentGroupSize) + { + if (MaxGroupSize() && MaxGroupSize() < currentGroupSize) + _currentGroupSize = MaxGroupSize(); + else + _currentGroupSize = currentGroupSize; + } + cl_uint CurrentGroupSize() { return _currentGroupSize; } + virtual cl_uint CurrentGroupNum(cl_uint threadCount) + { + if (threadCount == 0) return 0; + if (LocalMemory()) return 1; + return threadCount / CurrentGroupSize(); + } + cl_int Iterations() { return _iterations; } + std::string IterationsStr() + { + std::stringstream ss; + ss << _iterations; + return ss.str(); + } + private: - const TExplicitAtomicType _dataType; - const bool _useSVM; - HostDataType _startValue; - bool _localMemory; - bool _declaredInProgram; - bool _usedInFunction; - bool _genericAddrSpace; - bool _oldValueCheck; - bool _localRefValues; - cl_uint _maxGroupSize; - cl_uint _currentGroupSize; - cl_uint _passCount; - const cl_int _iterations; + const TExplicitAtomicType _dataType; + const bool _useSVM; + HostDataType _startValue; + bool _localMemory; + bool _declaredInProgram; + bool _usedInFunction; + bool _genericAddrSpace; + bool _oldValueCheck; + bool _localRefValues; + cl_uint _maxGroupSize; + cl_uint _currentGroupSize; + cl_uint _passCount; + const cl_int _iterations; }; -template -class CBasicTestMemOrderScope : public CBasicTest -{ +template +class CBasicTestMemOrderScope + : public CBasicTest { public: - using CBasicTest::LocalMemory; - using CBasicTest::MaxGroupSize; - using CBasicTest::CheckCapabilities; - CBasicTestMemOrderScope(TExplicitAtomicType dataType, bool useSVM = false) : CBasicTest(dataType, useSVM) - { - } - virtual std::string ProgramHeader(cl_uint maxNumDestItems) - { - std::string header; - if(gOldAPI) - { - std::string s = MemoryScope() == MEMORY_SCOPE_EMPTY ? "" : ",s"; - header += - "#define atomic_store_explicit(x,y,o"+s+") atomic_store(x,y)\n" - "#define atomic_load_explicit(x,o"+s+") atomic_load(x)\n" - "#define atomic_exchange_explicit(x,y,o"+s+") atomic_exchange(x,y)\n" - "#define atomic_compare_exchange_strong_explicit(x,y,z,os,of"+s+") atomic_compare_exchange_strong(x,y,z)\n" - "#define atomic_compare_exchange_weak_explicit(x,y,z,os,of"+s+") atomic_compare_exchange_weak(x,y,z)\n" - "#define atomic_fetch_add_explicit(x,y,o"+s+") atomic_fetch_add(x,y)\n" - "#define atomic_fetch_sub_explicit(x,y,o"+s+") atomic_fetch_sub(x,y)\n" - "#define atomic_fetch_or_explicit(x,y,o"+s+") atomic_fetch_or(x,y)\n" - "#define atomic_fetch_xor_explicit(x,y,o"+s+") atomic_fetch_xor(x,y)\n" - "#define atomic_fetch_and_explicit(x,y,o"+s+") atomic_fetch_and(x,y)\n" - "#define atomic_fetch_min_explicit(x,y,o"+s+") atomic_fetch_min(x,y)\n" - "#define atomic_fetch_max_explicit(x,y,o"+s+") atomic_fetch_max(x,y)\n" - "#define atomic_flag_test_and_set_explicit(x,o"+s+") atomic_flag_test_and_set(x)\n" - "#define atomic_flag_clear_explicit(x,o"+s+") atomic_flag_clear(x)\n"; - } - return header+CBasicTest::ProgramHeader(maxNumDestItems); - } - virtual std::string SingleTestName() - { - std::string testName = CBasicTest::SingleTestName(); - if(MemoryOrder() != MEMORY_ORDER_EMPTY) - { - testName += std::string(", ")+std::string(get_memory_order_type_name(MemoryOrder())).substr(sizeof("memory")); - } - if(MemoryScope() != MEMORY_SCOPE_EMPTY) - { - testName += std::string(", ")+std::string(get_memory_scope_type_name(MemoryScope())).substr(sizeof("memory")); - } - return testName; - } - virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) - { - if(LocalMemory() && - MemoryScope() != MEMORY_SCOPE_EMPTY && - MemoryScope() != MEMORY_SCOPE_WORK_GROUP) //memory scope should only be used for global memory - return 0; - if(MemoryScope() == MEMORY_SCOPE_DEVICE) - MaxGroupSize(16); // increase number of groups by forcing smaller group size - else - MaxGroupSize(0); // group size limited by device capabilities - - if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF) - return 0; // skip test - not applicable - - return CBasicTest::ExecuteSingleTest(deviceID, context, queue); - } - virtual int ExecuteForEachParameterSet(cl_device_id deviceID, cl_context context, cl_command_queue queue) - { - // repeat test for each reasonable memory order/scope combination - std::vector memoryOrder; - std::vector memoryScope; - int error = 0; - - // For OpenCL-3.0 and later some orderings and scopes are optional, so here - // we query for the supported ones. - test_error_ret( - getSupportedMemoryOrdersAndScopes(deviceID, memoryOrder, memoryScope), - "getSupportedMemoryOrdersAndScopes failed\n", TEST_FAIL); - - for(unsigned oi = 0; oi < memoryOrder.size(); oi++) - { - for(unsigned si = 0; si < memoryScope.size(); si++) - { - if(memoryOrder[oi] == MEMORY_ORDER_EMPTY && memoryScope[si] != MEMORY_SCOPE_EMPTY) - continue; - MemoryOrder(memoryOrder[oi]); - MemoryScope(memoryScope[si]); - EXECUTE_TEST(error, (CBasicTest::ExecuteForEachParameterSet(deviceID, context, queue))); - } - } - return error; - } - void MemoryOrder(TExplicitMemoryOrderType memoryOrder) {_memoryOrder = memoryOrder;} - TExplicitMemoryOrderType MemoryOrder() {return _memoryOrder;} - std::string MemoryOrderStr() - { - if(MemoryOrder() != MEMORY_ORDER_EMPTY) - return std::string(", ")+get_memory_order_type_name(MemoryOrder()); - return ""; - } - void MemoryScope(TExplicitMemoryScopeType memoryScope) {_memoryScope = memoryScope;} - TExplicitMemoryScopeType MemoryScope() {return _memoryScope;} - std::string MemoryScopeStr() - { - if(MemoryScope() != MEMORY_SCOPE_EMPTY) - return std::string(", ")+get_memory_scope_type_name(MemoryScope()); - return ""; - } - std::string MemoryOrderScopeStr() - { - return MemoryOrderStr()+MemoryScopeStr(); - } - virtual cl_uint CurrentGroupNum(cl_uint threadCount) - { - if(MemoryScope() == MEMORY_SCOPE_WORK_GROUP) - return 1; - return CBasicTest::CurrentGroupNum(threadCount); - } - virtual cl_uint MaxHostThreads() - { - // block host threads execution for memory scope different than - // memory_scope_all_svm_devices - if (MemoryScope() == MEMORY_SCOPE_ALL_DEVICES - || MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES || gHost) - { - return CBasicTest::MaxHostThreads(); - } - else - { - return 0; - } - } + using CBasicTest::LocalMemory; + using CBasicTest::MaxGroupSize; + using CBasicTest::CheckCapabilities; + CBasicTestMemOrderScope(TExplicitAtomicType dataType, bool useSVM = false) + : CBasicTest(dataType, useSVM) + {} + virtual std::string ProgramHeader(cl_uint maxNumDestItems) + { + std::string header; + if (gOldAPI) + { + std::string s = MemoryScope() == MEMORY_SCOPE_EMPTY ? "" : ",s"; + header += "#define atomic_store_explicit(x,y,o" + s + + ") atomic_store(x,y)\n" + "#define atomic_load_explicit(x,o" + + s + + ") atomic_load(x)\n" + "#define atomic_exchange_explicit(x,y,o" + + s + + ") atomic_exchange(x,y)\n" + "#define atomic_compare_exchange_strong_explicit(x,y,z,os,of" + + s + + ") atomic_compare_exchange_strong(x,y,z)\n" + "#define atomic_compare_exchange_weak_explicit(x,y,z,os,of" + + s + + ") atomic_compare_exchange_weak(x,y,z)\n" + "#define atomic_fetch_add_explicit(x,y,o" + + s + + ") atomic_fetch_add(x,y)\n" + "#define atomic_fetch_sub_explicit(x,y,o" + + s + + ") atomic_fetch_sub(x,y)\n" + "#define atomic_fetch_or_explicit(x,y,o" + + s + + ") atomic_fetch_or(x,y)\n" + "#define atomic_fetch_xor_explicit(x,y,o" + + s + + ") atomic_fetch_xor(x,y)\n" + "#define atomic_fetch_and_explicit(x,y,o" + + s + + ") atomic_fetch_and(x,y)\n" + "#define atomic_fetch_min_explicit(x,y,o" + + s + + ") atomic_fetch_min(x,y)\n" + "#define atomic_fetch_max_explicit(x,y,o" + + s + + ") atomic_fetch_max(x,y)\n" + "#define atomic_flag_test_and_set_explicit(x,o" + + s + + ") atomic_flag_test_and_set(x)\n" + "#define atomic_flag_clear_explicit(x,o" + + s + ") atomic_flag_clear(x)\n"; + } + return header + + CBasicTest::ProgramHeader( + maxNumDestItems); + } + virtual std::string SingleTestName() + { + std::string testName = + CBasicTest::SingleTestName(); + if (MemoryOrder() != MEMORY_ORDER_EMPTY) + { + testName += std::string(", ") + + std::string(get_memory_order_type_name(MemoryOrder())) + .substr(sizeof("memory")); + } + if (MemoryScope() != MEMORY_SCOPE_EMPTY) + { + testName += std::string(", ") + + std::string(get_memory_scope_type_name(MemoryScope())) + .substr(sizeof("memory")); + } + return testName; + } + virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, + cl_command_queue queue) + { + if (LocalMemory() && MemoryScope() != MEMORY_SCOPE_EMPTY + && MemoryScope() + != MEMORY_SCOPE_WORK_GROUP) // memory scope should only be used + // for global memory + return 0; + if (MemoryScope() == MEMORY_SCOPE_DEVICE) + MaxGroupSize( + 16); // increase number of groups by forcing smaller group size + else + MaxGroupSize(0); // group size limited by device capabilities + + if (CheckCapabilities(MemoryScope(), MemoryOrder()) + == TEST_SKIPPED_ITSELF) + return 0; // skip test - not applicable + + return CBasicTest::ExecuteSingleTest( + deviceID, context, queue); + } + virtual int ExecuteForEachParameterSet(cl_device_id deviceID, + cl_context context, + cl_command_queue queue) + { + // repeat test for each reasonable memory order/scope combination + std::vector memoryOrder; + std::vector memoryScope; + int error = 0; + + // For OpenCL-3.0 and later some orderings and scopes are optional, so + // here we query for the supported ones. + test_error_ret(getSupportedMemoryOrdersAndScopes(deviceID, memoryOrder, + memoryScope), + "getSupportedMemoryOrdersAndScopes failed\n", TEST_FAIL); + + for (unsigned oi = 0; oi < memoryOrder.size(); oi++) + { + for (unsigned si = 0; si < memoryScope.size(); si++) + { + if (memoryOrder[oi] == MEMORY_ORDER_EMPTY + && memoryScope[si] != MEMORY_SCOPE_EMPTY) + continue; + MemoryOrder(memoryOrder[oi]); + MemoryScope(memoryScope[si]); + EXECUTE_TEST( + error, + (CBasicTest:: + ExecuteForEachParameterSet(deviceID, context, queue))); + } + } + return error; + } + void MemoryOrder(TExplicitMemoryOrderType memoryOrder) + { + _memoryOrder = memoryOrder; + } + TExplicitMemoryOrderType MemoryOrder() { return _memoryOrder; } + std::string MemoryOrderStr() + { + if (MemoryOrder() != MEMORY_ORDER_EMPTY) + return std::string(", ") + + get_memory_order_type_name(MemoryOrder()); + return ""; + } + void MemoryScope(TExplicitMemoryScopeType memoryScope) + { + _memoryScope = memoryScope; + } + TExplicitMemoryScopeType MemoryScope() { return _memoryScope; } + std::string MemoryScopeStr() + { + if (MemoryScope() != MEMORY_SCOPE_EMPTY) + return std::string(", ") + + get_memory_scope_type_name(MemoryScope()); + return ""; + } + std::string MemoryOrderScopeStr() + { + return MemoryOrderStr() + MemoryScopeStr(); + } + virtual cl_uint CurrentGroupNum(cl_uint threadCount) + { + if (MemoryScope() == MEMORY_SCOPE_WORK_GROUP) return 1; + return CBasicTest::CurrentGroupNum( + threadCount); + } + virtual cl_uint MaxHostThreads() + { + // block host threads execution for memory scope different than + // memory_scope_all_svm_devices + if (MemoryScope() == MEMORY_SCOPE_ALL_DEVICES + || MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES || gHost) + { + return CBasicTest::MaxHostThreads(); + } + else + { + return 0; + } + } + private: - TExplicitMemoryOrderType _memoryOrder; - TExplicitMemoryScopeType _memoryScope; + TExplicitMemoryOrderType _memoryOrder; + TExplicitMemoryScopeType _memoryScope; }; -template -class CBasicTestMemOrder2Scope : public CBasicTestMemOrderScope -{ +template +class CBasicTestMemOrder2Scope + : public CBasicTestMemOrderScope { public: - using CBasicTestMemOrderScope::LocalMemory; - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryScope; - using CBasicTestMemOrderScope::MemoryOrderStr; - using CBasicTestMemOrderScope::MemoryScopeStr; - using CBasicTest::CheckCapabilities; - - CBasicTestMemOrder2Scope(TExplicitAtomicType dataType, bool useSVM = false) : CBasicTestMemOrderScope(dataType, useSVM) - { - } - virtual std::string SingleTestName() - { - std::string testName = CBasicTest::SingleTestName(); - if(MemoryOrder() != MEMORY_ORDER_EMPTY) - testName += std::string(", ")+std::string(get_memory_order_type_name(MemoryOrder())).substr(sizeof("memory")); - if(MemoryOrder2() != MEMORY_ORDER_EMPTY) - testName += std::string(", ")+std::string(get_memory_order_type_name(MemoryOrder2())).substr(sizeof("memory")); - if(MemoryScope() != MEMORY_SCOPE_EMPTY) - testName += std::string(", ")+std::string(get_memory_scope_type_name(MemoryScope())).substr(sizeof("memory")); - return testName; - } - virtual int ExecuteForEachParameterSet(cl_device_id deviceID, cl_context context, cl_command_queue queue) - { - // repeat test for each reasonable memory order/scope combination - std::vector memoryOrder; - std::vector memoryScope; - int error = 0; - - // For OpenCL-3.0 and later some orderings and scopes are optional, so here - // we query for the supported ones. - test_error_ret( - getSupportedMemoryOrdersAndScopes(deviceID, memoryOrder, memoryScope), - "getSupportedMemoryOrdersAndScopes failed\n", TEST_FAIL); - - for(unsigned oi = 0; oi < memoryOrder.size(); oi++) - { - for(unsigned o2i = 0; o2i < memoryOrder.size(); o2i++) - { - for(unsigned si = 0; si < memoryScope.size(); si++) + using CBasicTestMemOrderScope::LocalMemory; + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryScope; + using CBasicTestMemOrderScope::MemoryOrderStr; + using CBasicTestMemOrderScope::MemoryScopeStr; + using CBasicTest::CheckCapabilities; + + CBasicTestMemOrder2Scope(TExplicitAtomicType dataType, bool useSVM = false) + : CBasicTestMemOrderScope(dataType, + useSVM) + {} + virtual std::string SingleTestName() + { + std::string testName = + CBasicTest::SingleTestName(); + if (MemoryOrder() != MEMORY_ORDER_EMPTY) + testName += std::string(", ") + + std::string(get_memory_order_type_name(MemoryOrder())) + .substr(sizeof("memory")); + if (MemoryOrder2() != MEMORY_ORDER_EMPTY) + testName += std::string(", ") + + std::string(get_memory_order_type_name(MemoryOrder2())) + .substr(sizeof("memory")); + if (MemoryScope() != MEMORY_SCOPE_EMPTY) + testName += std::string(", ") + + std::string(get_memory_scope_type_name(MemoryScope())) + .substr(sizeof("memory")); + return testName; + } + virtual int ExecuteForEachParameterSet(cl_device_id deviceID, + cl_context context, + cl_command_queue queue) + { + // repeat test for each reasonable memory order/scope combination + std::vector memoryOrder; + std::vector memoryScope; + int error = 0; + + // For OpenCL-3.0 and later some orderings and scopes are optional, so + // here we query for the supported ones. + test_error_ret(getSupportedMemoryOrdersAndScopes(deviceID, memoryOrder, + memoryScope), + "getSupportedMemoryOrdersAndScopes failed\n", TEST_FAIL); + + for (unsigned oi = 0; oi < memoryOrder.size(); oi++) { - if((memoryOrder[oi] == MEMORY_ORDER_EMPTY || memoryOrder[o2i] == MEMORY_ORDER_EMPTY) - && memoryOrder[oi] != memoryOrder[o2i]) - continue; // both memory order arguments must be set (or none) - if((memoryOrder[oi] == MEMORY_ORDER_EMPTY || memoryOrder[o2i] == MEMORY_ORDER_EMPTY) - && memoryScope[si] != MEMORY_SCOPE_EMPTY) - continue; // memory scope without memory order is not allowed - MemoryOrder(memoryOrder[oi]); - MemoryOrder2(memoryOrder[o2i]); - MemoryScope(memoryScope[si]); - - if (CheckCapabilities(MemoryScope(), MemoryOrder()) - == TEST_SKIPPED_ITSELF) - continue; // skip test - not applicable - - if (CheckCapabilities(MemoryScope(), MemoryOrder2()) - == TEST_SKIPPED_ITSELF) - continue; // skip test - not applicable - - EXECUTE_TEST(error, (CBasicTest::ExecuteForEachParameterSet(deviceID, context, queue))); + for (unsigned o2i = 0; o2i < memoryOrder.size(); o2i++) + { + for (unsigned si = 0; si < memoryScope.size(); si++) + { + if ((memoryOrder[oi] == MEMORY_ORDER_EMPTY + || memoryOrder[o2i] == MEMORY_ORDER_EMPTY) + && memoryOrder[oi] != memoryOrder[o2i]) + continue; // both memory order arguments must be set (or + // none) + if ((memoryOrder[oi] == MEMORY_ORDER_EMPTY + || memoryOrder[o2i] == MEMORY_ORDER_EMPTY) + && memoryScope[si] != MEMORY_SCOPE_EMPTY) + continue; // memory scope without memory order is not + // allowed + MemoryOrder(memoryOrder[oi]); + MemoryOrder2(memoryOrder[o2i]); + MemoryScope(memoryScope[si]); + + if (CheckCapabilities(MemoryScope(), MemoryOrder()) + == TEST_SKIPPED_ITSELF) + continue; // skip test - not applicable + + if (CheckCapabilities(MemoryScope(), MemoryOrder2()) + == TEST_SKIPPED_ITSELF) + continue; // skip test - not applicable + + EXECUTE_TEST(error, + (CBasicTest:: + ExecuteForEachParameterSet( + deviceID, context, queue))); + } + } } - } - } - return error; - } - void MemoryOrder2(TExplicitMemoryOrderType memoryOrderFail) {_memoryOrder2 = memoryOrderFail;} - TExplicitMemoryOrderType MemoryOrder2() {return _memoryOrder2;} - std::string MemoryOrderFailStr() - { - if(MemoryOrder2() != MEMORY_ORDER_EMPTY) - return std::string(", ")+get_memory_order_type_name(MemoryOrder2()); - return ""; - } - std::string MemoryOrderScope() - { - return MemoryOrderStr()+MemoryOrderFailStr()+MemoryScopeStr(); - } + return error; + } + void MemoryOrder2(TExplicitMemoryOrderType memoryOrderFail) + { + _memoryOrder2 = memoryOrderFail; + } + TExplicitMemoryOrderType MemoryOrder2() { return _memoryOrder2; } + std::string MemoryOrderFailStr() + { + if (MemoryOrder2() != MEMORY_ORDER_EMPTY) + return std::string(", ") + + get_memory_order_type_name(MemoryOrder2()); + return ""; + } + std::string MemoryOrderScope() + { + return MemoryOrderStr() + MemoryOrderFailStr() + MemoryScopeStr(); + } + private: - TExplicitMemoryOrderType _memoryOrder2; + TExplicitMemoryOrderType _memoryOrder2; }; -template -std::string CBasicTest::PragmaHeader(cl_device_id deviceID) +template +std::string +CBasicTest::PragmaHeader(cl_device_id deviceID) { - std::string pragma; - - if(gOldAPI) - { - pragma += "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n"; - pragma += "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n"; - pragma += "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"; - pragma += "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n"; - } - // Create the pragma lines for this kernel - if(DataType().Size(deviceID) == 8) - { - pragma += "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"; - pragma += "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n"; - } - if(_dataType == TYPE_ATOMIC_DOUBLE) - pragma += "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; - return pragma; + std::string pragma; + + if (gOldAPI) + { + pragma += "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : " + "enable\n"; + pragma += "#pragma OPENCL EXTENSION " + "cl_khr_local_int32_extended_atomics : enable\n"; + pragma += "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : " + "enable\n"; + pragma += "#pragma OPENCL EXTENSION " + "cl_khr_global_int32_extended_atomics : enable\n"; + } + // Create the pragma lines for this kernel + if (DataType().Size(deviceID) == 8) + { + pragma += + "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"; + pragma += + "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n"; + } + if (_dataType == TYPE_ATOMIC_DOUBLE) + pragma += "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + return pragma; } -template -std::string CBasicTest::ProgramHeader(cl_uint maxNumDestItems) +template +std::string +CBasicTest::ProgramHeader(cl_uint maxNumDestItems) { - // Create the program header - std::string header; - std::string aTypeName = DataType().AtomicTypeName(); - std::string cTypeName = DataType().RegularTypeName(); - std::string argListForKernel; - std::string argListForFunction; - std::string argListNoTypes; - std::string functionPrototype; - std::string addressSpace = LocalMemory() ? "__local " : "__global "; - - if(gOldAPI) - { - header += std::string("#define ")+aTypeName+" "+cTypeName+"\n" - "#define atomic_store(x,y) (*(x) = y)\n" - "#define atomic_load(x) (*(x))\n" - "#define ATOMIC_VAR_INIT(x) (x)\n" - "#define ATOMIC_FLAG_INIT 0\n" - "#define atomic_init(x,y) atomic_store(x,y)\n"; - if(aTypeName == "atomic_float") - header += "#define atomic_exchange(x,y) atomic_xchg(x,y)\n"; - else if(aTypeName == "atomic_double") - header += "double atomic_exchange(volatile "+addressSpace+"atomic_double *x, double y)\n" - "{\n" - " long tmp = *(long*)&y, res;\n" - " volatile "+addressSpace+"long *tmpA = (volatile "+addressSpace+"long)x;\n" - " res = atom_xchg(tmpA,tmp);\n" - " return *(double*)&res;\n" - "}\n"; - else - header += "#define atomic_exchange(x,y) atom_xchg(x,y)\n"; - if(aTypeName != "atomic_float" && aTypeName != "atomic_double") - header += - "bool atomic_compare_exchange_strong(volatile "+addressSpace+" "+aTypeName+" *a, "+cTypeName+" *expected, "+cTypeName+" desired)\n" - "{\n" - " "+cTypeName+" old = atom_cmpxchg(a, *expected, desired);\n" - " if(old == *expected)\n" - " return true;\n" - " *expected = old;\n" - " return false;\n" - "}\n" - "#define atomic_compare_exchange_weak atomic_compare_exchange_strong\n"; - header += - "#define atomic_fetch_add(x,y) atom_add(x,y)\n" - "#define atomic_fetch_sub(x,y) atom_sub(x,y)\n" - "#define atomic_fetch_or(x,y) atom_or(x,y)\n" - "#define atomic_fetch_xor(x,y) atom_xor(x,y)\n" - "#define atomic_fetch_and(x,y) atom_and(x,y)\n" - "#define atomic_fetch_min(x,y) atom_min(x,y)\n" - "#define atomic_fetch_max(x,y) atom_max(x,y)\n" - "#define atomic_flag_test_and_set(x) atomic_exchange(x,1)\n" - "#define atomic_flag_clear(x) atomic_store(x,0)\n" - "\n"; - } - if(!LocalMemory() && DeclaredInProgram()) - { - // additional atomic variable for results copying (last thread will do this) - header += "__global volatile atomic_uint finishedThreads = ATOMIC_VAR_INIT(0);\n"; - // atomic variables declared in program scope - test data - std::stringstream ss; - ss << maxNumDestItems; - header += std::string("__global volatile ")+aTypeName+" destMemory["+ss.str()+"] = {\n"; - ss.str(""); - ss << _startValue; - for(cl_uint i = 0; i < maxNumDestItems; i++) - { - if(aTypeName == "atomic_flag") - header += " ATOMIC_FLAG_INIT"; - else - header += " ATOMIC_VAR_INIT("+ss.str()+")"; - if(i+1 < maxNumDestItems) - header += ","; - header += "\n"; - } - header+= - "};\n" - "\n"; - } - return header; + // Create the program header + std::string header; + std::string aTypeName = DataType().AtomicTypeName(); + std::string cTypeName = DataType().RegularTypeName(); + std::string argListForKernel; + std::string argListForFunction; + std::string argListNoTypes; + std::string functionPrototype; + std::string addressSpace = LocalMemory() ? "__local " : "__global "; + + if (gOldAPI) + { + header += std::string("#define ") + aTypeName + " " + cTypeName + + "\n" + "#define atomic_store(x,y) (*(x) " + "= y)\n" + "#define atomic_load(x) " + "(*(x))\n" + "#define ATOMIC_VAR_INIT(x) (x)\n" + "#define ATOMIC_FLAG_INIT 0\n" + "#define atomic_init(x,y) " + "atomic_store(x,y)\n"; + if (aTypeName == "atomic_float") + header += "#define atomic_exchange(x,y) " + " atomic_xchg(x,y)\n"; + else if (aTypeName == "atomic_double") + header += "double atomic_exchange(volatile " + addressSpace + + "atomic_double *x, double y)\n" + "{\n" + " long tmp = *(long*)&y, res;\n" + " volatile " + + addressSpace + "long *tmpA = (volatile " + addressSpace + + "long)x;\n" + " res = atom_xchg(tmpA,tmp);\n" + " return *(double*)&res;\n" + "}\n"; + else + header += "#define atomic_exchange(x,y) " + " atom_xchg(x,y)\n"; + if (aTypeName != "atomic_float" && aTypeName != "atomic_double") + header += "bool atomic_compare_exchange_strong(volatile " + + addressSpace + " " + aTypeName + " *a, " + cTypeName + + " *expected, " + cTypeName + + " desired)\n" + "{\n" + " " + + cTypeName + + " old = atom_cmpxchg(a, *expected, desired);\n" + " if(old == *expected)\n" + " return true;\n" + " *expected = old;\n" + " return false;\n" + "}\n" + "#define atomic_compare_exchange_weak " + "atomic_compare_exchange_strong\n"; + header += "#define atomic_fetch_add(x,y) " + "atom_add(x,y)\n" + "#define atomic_fetch_sub(x,y) " + "atom_sub(x,y)\n" + "#define atomic_fetch_or(x,y) " + "atom_or(x,y)\n" + "#define atomic_fetch_xor(x,y) " + "atom_xor(x,y)\n" + "#define atomic_fetch_and(x,y) " + "atom_and(x,y)\n" + "#define atomic_fetch_min(x,y) " + "atom_min(x,y)\n" + "#define atomic_fetch_max(x,y) " + "atom_max(x,y)\n" + "#define atomic_flag_test_and_set(x) " + "atomic_exchange(x,1)\n" + "#define atomic_flag_clear(x) " + "atomic_store(x,0)\n" + "\n"; + } + if (!LocalMemory() && DeclaredInProgram()) + { + // additional atomic variable for results copying (last thread will do + // this) + header += "__global volatile atomic_uint finishedThreads = " + "ATOMIC_VAR_INIT(0);\n"; + // atomic variables declared in program scope - test data + std::stringstream ss; + ss << maxNumDestItems; + header += std::string("__global volatile ") + aTypeName + " destMemory[" + + ss.str() + "] = {\n"; + ss.str(""); + ss << _startValue; + for (cl_uint i = 0; i < maxNumDestItems; i++) + { + if (aTypeName == "atomic_flag") + header += " ATOMIC_FLAG_INIT"; + else + header += " ATOMIC_VAR_INIT(" + ss.str() + ")"; + if (i + 1 < maxNumDestItems) header += ","; + header += "\n"; + } + header += "};\n" + "\n"; + } + return header; } -template +template std::string CBasicTest::FunctionCode() { - if(!UsedInFunction()) - return ""; - std::string addressSpace = LocalMemory() ? "__local " : "__global "; - std::string code = "void test_atomic_function(uint tid, uint threadCount, uint numDestItems, volatile "; - if(!GenericAddrSpace()) - code += addressSpace; - code += std::string(DataType().AtomicTypeName())+" *destMemory, __global "+DataType().RegularTypeName()+ - " *oldValues"; - if(LocalRefValues()) - code += std::string(", __local ")+DataType().RegularTypeName()+" *localValues"; - code += ")\n" - "{\n"; - code += ProgramCore(); - code += "}\n" - "\n"; - return code; + if (!UsedInFunction()) return ""; + std::string addressSpace = LocalMemory() ? "__local " : "__global "; + std::string code = "void test_atomic_function(uint tid, uint threadCount, " + "uint numDestItems, volatile "; + if (!GenericAddrSpace()) code += addressSpace; + code += std::string(DataType().AtomicTypeName()) + " *destMemory, __global " + + DataType().RegularTypeName() + " *oldValues"; + if (LocalRefValues()) + code += std::string(", __local ") + DataType().RegularTypeName() + + " *localValues"; + code += ")\n" + "{\n"; + code += ProgramCore(); + code += "}\n" + "\n"; + return code; } -template -std::string CBasicTest::KernelCode(cl_uint maxNumDestItems) +template +std::string +CBasicTest::KernelCode(cl_uint maxNumDestItems) { - std::string aTypeName = DataType().AtomicTypeName(); - std::string cTypeName = DataType().RegularTypeName(); - std::string addressSpace = LocalMemory() ? "__local " : "__global "; - std::string code = "__kernel void test_atomic_kernel(uint threadCount, uint numDestItems, "; - - // prepare list of arguments for kernel - if(LocalMemory()) - { - code += std::string("__global ")+cTypeName+" *finalDest, __global "+cTypeName+" *oldValues," - " volatile "+addressSpace+aTypeName+" *"+(DeclaredInProgram() ? "notUsed" : "")+"destMemory"; - } - else - { - code += "volatile "+addressSpace+(DeclaredInProgram() ? (cTypeName+" *finalDest") : (aTypeName+" *destMemory"))+ - ", __global "+cTypeName+" *oldValues"; - } - if(LocalRefValues()) - code += std::string(", __local ")+cTypeName+" *localValues"; - code += ")\n" - "{\n"; - if(LocalMemory() && DeclaredInProgram()) - { - // local atomics declared in kernel scope - std::stringstream ss; - ss << maxNumDestItems; - code += std::string(" __local volatile ")+aTypeName+" destMemory["+ss.str()+"];\n"; - } - code += " uint tid = get_global_id(0);\n" - "\n"; - if(LocalMemory()) - { - // memory_order_relaxed is sufficient for these initialization operations - // as the barrier below will act as a fence, providing an order to the - // operations. memory_scope_work_group is sufficient as local memory is - // only visible within the work-group. - code += R"( + std::string aTypeName = DataType().AtomicTypeName(); + std::string cTypeName = DataType().RegularTypeName(); + std::string addressSpace = LocalMemory() ? "__local " : "__global "; + std::string code = "__kernel void test_atomic_kernel(uint threadCount, " + "uint numDestItems, "; + + // prepare list of arguments for kernel + if (LocalMemory()) + { + code += std::string("__global ") + cTypeName + " *finalDest, __global " + + cTypeName + + " *oldValues," + " volatile " + + addressSpace + aTypeName + " *" + + (DeclaredInProgram() ? "notUsed" : "") + "destMemory"; + } + else + { + code += "volatile " + addressSpace + + (DeclaredInProgram() ? (cTypeName + " *finalDest") + : (aTypeName + " *destMemory")) + + ", __global " + cTypeName + " *oldValues"; + } + if (LocalRefValues()) + code += std::string(", __local ") + cTypeName + " *localValues"; + code += ")\n" + "{\n"; + if (LocalMemory() && DeclaredInProgram()) + { + // local atomics declared in kernel scope + std::stringstream ss; + ss << maxNumDestItems; + code += std::string(" __local volatile ") + aTypeName + " destMemory[" + + ss.str() + "];\n"; + } + code += " uint tid = get_global_id(0);\n" + "\n"; + if (LocalMemory()) + { + // memory_order_relaxed is sufficient for these initialization + // operations as the barrier below will act as a fence, providing an + // order to the operations. memory_scope_work_group is sufficient as + // local memory is only visible within the work-group. + code += R"( // initialize atomics not reachable from host (first thread // is doing this, other threads are waiting on barrier) if(get_local_id(0) == 0) for(uint dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++) {)"; - if (aTypeName == "atomic_flag") - { - code += R"( + if (aTypeName == "atomic_flag") + { + code += R"( if(finalDest[dstItemIdx]) atomic_flag_test_and_set_explicit(destMemory+dstItemIdx, memory_order_relaxed, @@ -823,512 +976,565 @@ std::string CBasicTest::KernelCode(cl_uint maxNumD atomic_flag_clear_explicit(destMemory+dstItemIdx, memory_order_relaxed, memory_scope_work_group);)"; - } - else - { - code += R"( + } + else + { + code += R"( atomic_store_explicit(destMemory+dstItemIdx, finalDest[dstItemIdx], memory_order_relaxed, memory_scope_work_group);)"; + } + code += " }\n" + " barrier(CLK_LOCAL_MEM_FENCE);\n" + "\n"; } - code += - " }\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - "\n"; - } - if (LocalRefValues()) - { - code += - " // Copy input reference values into local memory\n"; - if (NumNonAtomicVariablesPerThread() == 1) - code += " localValues[get_local_id(0)] = oldValues[tid];\n"; - else + if (LocalRefValues()) { - std::stringstream ss; - ss << NumNonAtomicVariablesPerThread(); - code += - " for(uint rfId = 0; rfId < " + ss.str() + "; rfId++)\n" - " localValues[get_local_id(0)*" + ss.str() + "+rfId] = oldValues[tid*" + ss.str() + "+rfId];\n"; - } - code += - " barrier(CLK_LOCAL_MEM_FENCE);\n" - "\n"; - } - if (UsedInFunction()) - code += std::string(" test_atomic_function(tid, threadCount, numDestItems, destMemory, oldValues")+ - (LocalRefValues() ? ", localValues" : "")+");\n"; - else - code += ProgramCore(); - code += "\n"; - if (LocalRefValues()) - { - code += - " // Copy local reference values into output array\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n"; - if (NumNonAtomicVariablesPerThread() == 1) - code += " oldValues[tid] = localValues[get_local_id(0)];\n"; + code += " // Copy input reference values into local memory\n"; + if (NumNonAtomicVariablesPerThread() == 1) + code += " localValues[get_local_id(0)] = oldValues[tid];\n"; + else + { + std::stringstream ss; + ss << NumNonAtomicVariablesPerThread(); + code += " for(uint rfId = 0; rfId < " + ss.str() + + "; rfId++)\n" + " localValues[get_local_id(0)*" + + ss.str() + "+rfId] = oldValues[tid*" + ss.str() + "+rfId];\n"; + } + code += " barrier(CLK_LOCAL_MEM_FENCE);\n" + "\n"; + } + if (UsedInFunction()) + code += std::string(" test_atomic_function(tid, threadCount, " + "numDestItems, destMemory, oldValues") + + (LocalRefValues() ? ", localValues" : "") + ");\n"; else + code += ProgramCore(); + code += "\n"; + if (LocalRefValues()) { - std::stringstream ss; - ss << NumNonAtomicVariablesPerThread(); - code += - " for(uint rfId = 0; rfId < " + ss.str() + "; rfId++)\n" - " oldValues[tid*" + ss.str() + "+rfId] = localValues[get_local_id(0)*" + ss.str() + "+rfId];\n"; + code += " // Copy local reference values into output array\n" + " barrier(CLK_LOCAL_MEM_FENCE);\n"; + if (NumNonAtomicVariablesPerThread() == 1) + code += " oldValues[tid] = localValues[get_local_id(0)];\n"; + else + { + std::stringstream ss; + ss << NumNonAtomicVariablesPerThread(); + code += " for(uint rfId = 0; rfId < " + ss.str() + + "; rfId++)\n" + " oldValues[tid*" + + ss.str() + "+rfId] = localValues[get_local_id(0)*" + ss.str() + + "+rfId];\n"; + } + code += "\n"; } - code += "\n"; - } - if(LocalMemory() || DeclaredInProgram()) - { - code += " // Copy final values to host reachable buffer\n"; - if(LocalMemory()) - code += - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " if(get_local_id(0) == 0) // first thread in workgroup\n"; - else - // global atomics declared in program scope - code += R"( + if (LocalMemory() || DeclaredInProgram()) + { + code += " // Copy final values to host reachable buffer\n"; + if (LocalMemory()) + code += " barrier(CLK_LOCAL_MEM_FENCE);\n" + " if(get_local_id(0) == 0) // first thread in workgroup\n"; + else + // global atomics declared in program scope + code += R"( if(atomic_fetch_add_explicit(&finishedThreads, 1u, memory_order_relaxed, memory_scope_work_group) == get_global_size(0)-1) // last finished thread )"; - code += - " for(uint dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++)\n"; - if(aTypeName == "atomic_flag") - { - code += R"( + code += " for(uint dstItemIdx = 0; dstItemIdx < numDestItems; " + "dstItemIdx++)\n"; + if (aTypeName == "atomic_flag") + { + code += R"( finalDest[dstItemIdx] = atomic_flag_test_and_set_explicit(destMemory+dstItemIdx, memory_order_relaxed, memory_scope_work_group);)"; - } - else - { - code += R"( + } + else + { + code += R"( finalDest[dstItemIdx] = atomic_load_explicit(destMemory+dstItemIdx, memory_order_relaxed, memory_scope_work_group);)"; + } } - } - code += "}\n" - "\n"; - return code; + code += "}\n" + "\n"; + return code; } template -int CBasicTest::ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) +int CBasicTest::ExecuteSingleTest( + cl_device_id deviceID, cl_context context, cl_command_queue queue) { - int error; - clProgramWrapper program; - clKernelWrapper kernel; - size_t threadNum[1]; - clMemWrapper streams[2]; - std::vector destItems; - HostAtomicType *svmAtomicBuffer = 0; - std::vector refValues, startRefValues; - HostDataType *svmDataBuffer = 0; - cl_uint deviceThreadCount, hostThreadCount, threadCount; - size_t groupSize = 0; - std::string programSource; - const char *programLine; - MTdata d; - size_t typeSize = DataType().Size(deviceID); - - deviceThreadCount = _maxDeviceThreads; - hostThreadCount = MaxHostThreads(); - threadCount = deviceThreadCount+hostThreadCount; - - //log_info("\t%s %s%s...\n", local ? "local" : "global", DataType().AtomicTypeName(), memoryOrderScope.c_str()); - log_info("\t%s...\n", SingleTestName().c_str()); - - if(!LocalMemory() && DeclaredInProgram() && gNoGlobalVariables) // no support for program scope global variables - { - log_info("\t\tTest disabled\n"); - return 0; - } - if(UsedInFunction() && GenericAddrSpace() && gNoGenericAddressSpace) - { - log_info("\t\tTest disabled\n"); - return 0; - } - - // set up work sizes based on device capabilities and test configuration - error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(groupSize), &groupSize, NULL); - test_error(error, "Unable to obtain max work group size for device"); - CurrentGroupSize((cl_uint)groupSize); - if(CurrentGroupSize() > deviceThreadCount) - CurrentGroupSize(deviceThreadCount); - if(CurrentGroupNum(deviceThreadCount) == 1 || gOldAPI) - deviceThreadCount = CurrentGroupSize()*CurrentGroupNum(deviceThreadCount); - threadCount = deviceThreadCount+hostThreadCount; - - // If we're given a num_results function, we need to determine how many result objects we need. - // This is the first assessment for current maximum number of threads (exact thread count is not known here) - // - needed for program source code generation (arrays of atomics declared in program) - cl_uint numDestItems = NumResults(threadCount, deviceID); - - if(deviceThreadCount > 0) - { - // This loop iteratively reduces the workgroup size by 2 and then - // re-generates the kernel with the reduced - // workgroup size until we find a size which is admissible for the kernel - // being run or reduce the wg size - // to the trivial case of 1 (which was separately verified to be accurate - // for the kernel being run) - - while ((CurrentGroupSize() > 1)) - { - // Re-generate the kernel code with the current group size - if (kernel) clReleaseKernel(kernel); - if (program) clReleaseProgram(program); - programSource = PragmaHeader(deviceID) + ProgramHeader(numDestItems) - + FunctionCode() + KernelCode(numDestItems); - programLine = programSource.c_str(); - if (create_single_kernel_helper_with_build_options( - context, &program, &kernel, 1, &programLine, - "test_atomic_kernel", gOldAPI ? "" : nullptr)) - { - return -1; - } - // Get work group size for the new kernel - error = clGetKernelWorkGroupInfo(kernel, deviceID, - CL_KERNEL_WORK_GROUP_SIZE, - sizeof(groupSize), &groupSize, NULL); - test_error(error, - "Unable to obtain max work group size for device and " - "kernel combo"); - - if (LocalMemory()) - { - cl_ulong usedLocalMemory; - cl_ulong totalLocalMemory; - cl_uint maxWorkGroupSize; - - error = clGetKernelWorkGroupInfo( - kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE, - sizeof(usedLocalMemory), &usedLocalMemory, NULL); - test_error(error, "clGetKernelWorkGroupInfo failed"); - - error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE, - sizeof(totalLocalMemory), - &totalLocalMemory, NULL); - test_error(error, "clGetDeviceInfo failed"); - - // We know that each work-group is going to use typeSize * - // deviceThreadCount bytes of local memory - // so pick the maximum value for deviceThreadCount that uses all - // the local memory. - maxWorkGroupSize = - ((totalLocalMemory - usedLocalMemory) / typeSize); - - if (maxWorkGroupSize < groupSize) groupSize = maxWorkGroupSize; - } - if (CurrentGroupSize() <= groupSize) - break; - else - CurrentGroupSize(CurrentGroupSize() / 2); - } - if(CurrentGroupSize() > deviceThreadCount) - CurrentGroupSize(deviceThreadCount); - if(CurrentGroupNum(deviceThreadCount) == 1 || gOldAPI) - deviceThreadCount = CurrentGroupSize()*CurrentGroupNum(deviceThreadCount); - threadCount = deviceThreadCount+hostThreadCount; - } - if (gDebug) - { - log_info("Program source:\n"); - log_info("%s\n", programLine); - } - if(deviceThreadCount > 0) - log_info("\t\t(thread count %u, group size %u)\n", deviceThreadCount, CurrentGroupSize()); - if(hostThreadCount > 0) - log_info("\t\t(host threads %u)\n", hostThreadCount); - - refValues.resize(threadCount*NumNonAtomicVariablesPerThread()); - - // Generate ref data if we have a ref generator provided - d = init_genrand(gRandomSeed); - startRefValues.resize(threadCount*NumNonAtomicVariablesPerThread()); - if(GenerateRefs(threadCount, &startRefValues[0], d)) - { - //copy ref values for host threads - memcpy(&refValues[0], &startRefValues[0], sizeof(HostDataType)*threadCount*NumNonAtomicVariablesPerThread()); - } - else - { - startRefValues.resize(0); - } - free_mtdata(d); - d = NULL; - - // If we're given a num_results function, we need to determine how many result objects we need. If - // we don't have it, we assume it's just 1 - // This is final value (exact thread count is known in this place) - numDestItems = NumResults(threadCount, deviceID); - - destItems.resize(numDestItems); - for(cl_uint i = 0; i < numDestItems; i++) - destItems[i] = _startValue; - - // Create main buffer with atomic variables (array size dependent on particular test) - if(UseSVM()) - { - if(gUseHostPtr) - svmAtomicBuffer = (HostAtomicType*)malloc(typeSize * numDestItems); - else - svmAtomicBuffer = (HostAtomicType*)clSVMAlloc(context, CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, typeSize * numDestItems, 0); - if(!svmAtomicBuffer) - { - log_error("ERROR: clSVMAlloc failed!\n"); - return -1; - } - memcpy(svmAtomicBuffer, &destItems[0], typeSize * numDestItems); - streams[0] = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, - typeSize * numDestItems, svmAtomicBuffer, NULL); - } - else - { - streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, - typeSize * numDestItems, &destItems[0], NULL); - } - if (!streams[0]) - { - log_error("ERROR: Creating output array failed!\n"); - return -1; - } - // Create buffer for per-thread input/output data - if(UseSVM()) - { - if(gUseHostPtr) - svmDataBuffer = (HostDataType*)malloc(typeSize*threadCount*NumNonAtomicVariablesPerThread()); - else - svmDataBuffer = (HostDataType*)clSVMAlloc(context, CL_MEM_SVM_FINE_GRAIN_BUFFER | (SVMDataBufferAllSVMConsistent() ? CL_MEM_SVM_ATOMICS : 0), typeSize*threadCount*NumNonAtomicVariablesPerThread(), 0); - if(!svmDataBuffer) - { - log_error("ERROR: clSVMAlloc failed!\n"); - return -1; - } - if(startRefValues.size()) - memcpy(svmDataBuffer, &startRefValues[0], typeSize*threadCount*NumNonAtomicVariablesPerThread()); - streams[1] = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, - typeSize * threadCount - * NumNonAtomicVariablesPerThread(), - svmDataBuffer, NULL); - } - else - { - streams[1] = clCreateBuffer( - context, - ((startRefValues.size() ? CL_MEM_COPY_HOST_PTR : CL_MEM_READ_WRITE)), - typeSize * threadCount * NumNonAtomicVariablesPerThread(), - startRefValues.size() ? &startRefValues[0] : 0, NULL); - } - if (!streams[1]) - { - log_error("ERROR: Creating reference array failed!\n"); - return -1; - } - if(deviceThreadCount > 0) - { - cl_uint argInd = 0; - /* Set the arguments */ - error = clSetKernelArg(kernel, argInd++, sizeof(threadCount), &threadCount); - test_error(error, "Unable to set kernel argument"); - error = clSetKernelArg(kernel, argInd++, sizeof(numDestItems), &numDestItems); - test_error(error, "Unable to set indexed kernel argument"); - error = clSetKernelArg(kernel, argInd++, sizeof(streams[0]), &streams[0]); - test_error(error, "Unable to set indexed kernel arguments"); - error = clSetKernelArg(kernel, argInd++, sizeof(streams[1]), &streams[1]); - test_error(error, "Unable to set indexed kernel arguments"); - if(LocalMemory()) - { - error = clSetKernelArg(kernel, argInd++, typeSize * numDestItems, NULL); - test_error(error, "Unable to set indexed local kernel argument"); - } - if(LocalRefValues()) - { - error = clSetKernelArg(kernel, argInd++, LocalRefValues() ? typeSize*CurrentGroupSize()*NumNonAtomicVariablesPerThread() : 1, NULL); - test_error(error, "Unable to set indexed kernel argument"); - } - } - /* Configure host threads */ - std::vector hostThreadContexts(hostThreadCount); - for(unsigned int t = 0; t < hostThreadCount; t++) - { - hostThreadContexts[t].test = this; - hostThreadContexts[t].tid = deviceThreadCount+t; - hostThreadContexts[t].threadCount = threadCount; - hostThreadContexts[t].destMemory = UseSVM() ? svmAtomicBuffer : &destItems[0]; - hostThreadContexts[t].oldValues = UseSVM() ? svmDataBuffer : &refValues[0]; - } - - if(deviceThreadCount > 0) - { - /* Run the kernel */ - threadNum[0] = deviceThreadCount; - groupSize = CurrentGroupSize(); - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threadNum, &groupSize, 0, NULL, NULL); - test_error(error, "Unable to execute test kernel"); - /* start device threads */ - error = clFlush(queue); - test_error(error, "clFlush failed"); - } - - /* Start host threads and wait for finish */ - if(hostThreadCount > 0) - ThreadPool_Do(HostThreadFunction, hostThreadCount, &hostThreadContexts[0]); - - if(UseSVM()) - { - error = clFinish(queue); - test_error(error, "clFinish failed"); - memcpy(&destItems[0], svmAtomicBuffer, typeSize*numDestItems); - memcpy(&refValues[0], svmDataBuffer, typeSize*threadCount*NumNonAtomicVariablesPerThread()); - } - else - { - if(deviceThreadCount > 0) - { - error = clEnqueueReadBuffer(queue, streams[0], CL_TRUE, 0, typeSize * numDestItems, &destItems[0], 0, NULL, NULL); - test_error(error, "Unable to read result value!"); - error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, typeSize * deviceThreadCount*NumNonAtomicVariablesPerThread(), &refValues[0], 0, NULL, NULL); - test_error(error, "Unable to read reference values!"); - } - } - bool dataVerified = false; - // If we have an expectedFn, then we need to generate a final value to compare against. If we don't - // have one, it's because we're comparing ref values only - for(cl_uint i = 0; i < numDestItems; i++) - { - HostDataType expected; - - if(!ExpectedValue(expected, threadCount, startRefValues.size() ? &startRefValues[0] : 0, i)) - break; // no expected value function provided - - if(expected != destItems[i]) - { - std::stringstream logLine; - logLine << "ERROR: Result " << i << " from kernel does not validate! (should be " << expected << ", was " << destItems[i] << ")\n"; - log_error("%s", logLine.str().c_str()); - for(i = 0; i < threadCount; i++) - { - logLine.str(""); - logLine << " --- " << i << " - "; - if(startRefValues.size()) - logLine << startRefValues[i] << " -> " << refValues[i]; - else - logLine << refValues[i]; - logLine << " --- "; - if(i < numDestItems) - logLine << destItems[i]; - logLine << "\n"; - log_info("%s", logLine.str().c_str()); - } - if(!gDebug) - { - log_info("Program source:\n"); - log_info("%s\n", programLine); - } - return -1; - } - dataVerified = true; - } - - bool dataCorrect = false; - /* Use the verify function (if provided) to also check the results */ - if(VerifyRefs(dataCorrect, threadCount, &refValues[0], &destItems[0])) - { - if(!dataCorrect) - { - log_error("ERROR: Reference values did not validate!\n"); - std::stringstream logLine; - for(cl_uint i = 0; i < threadCount; i++) - for (cl_uint j = 0; j < NumNonAtomicVariablesPerThread(); j++) - { - logLine.str(""); - logLine << " --- " << i << " - " << refValues[i*NumNonAtomicVariablesPerThread()+j] << " --- "; - if(j == 0 && i < numDestItems) - logLine << destItems[i]; - logLine << "\n"; - log_info("%s", logLine.str().c_str()); - } - if(!gDebug) - { + int error; + clProgramWrapper program; + clKernelWrapper kernel; + size_t threadNum[1]; + clMemWrapper streams[2]; + std::vector destItems; + HostAtomicType *svmAtomicBuffer = 0; + std::vector refValues, startRefValues; + HostDataType *svmDataBuffer = 0; + cl_uint deviceThreadCount, hostThreadCount, threadCount; + size_t groupSize = 0; + std::string programSource; + const char *programLine; + MTdata d; + size_t typeSize = DataType().Size(deviceID); + + deviceThreadCount = _maxDeviceThreads; + hostThreadCount = MaxHostThreads(); + threadCount = deviceThreadCount + hostThreadCount; + + // log_info("\t%s %s%s...\n", local ? "local" : "global", + // DataType().AtomicTypeName(), memoryOrderScope.c_str()); + log_info("\t%s...\n", SingleTestName().c_str()); + + if (!LocalMemory() && DeclaredInProgram() + && gNoGlobalVariables) // no support for program scope global variables + { + log_info("\t\tTest disabled\n"); + return 0; + } + if (UsedInFunction() && GenericAddrSpace() && gNoGenericAddressSpace) + { + log_info("\t\tTest disabled\n"); + return 0; + } + + // set up work sizes based on device capabilities and test configuration + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE, + sizeof(groupSize), &groupSize, NULL); + test_error(error, "Unable to obtain max work group size for device"); + CurrentGroupSize((cl_uint)groupSize); + if (CurrentGroupSize() > deviceThreadCount) + CurrentGroupSize(deviceThreadCount); + if (CurrentGroupNum(deviceThreadCount) == 1 || gOldAPI) + deviceThreadCount = + CurrentGroupSize() * CurrentGroupNum(deviceThreadCount); + threadCount = deviceThreadCount + hostThreadCount; + + // If we're given a num_results function, we need to determine how many + // result objects we need. This is the first assessment for current maximum + // number of threads (exact thread count is not known here) + // - needed for program source code generation (arrays of atomics declared + // in program) + cl_uint numDestItems = NumResults(threadCount, deviceID); + + if (deviceThreadCount > 0) + { + // This loop iteratively reduces the workgroup size by 2 and then + // re-generates the kernel with the reduced + // workgroup size until we find a size which is admissible for the + // kernel being run or reduce the wg size to the trivial case of 1 + // (which was separately verified to be accurate for the kernel being + // run) + + while ((CurrentGroupSize() > 1)) + { + // Re-generate the kernel code with the current group size + if (kernel) clReleaseKernel(kernel); + if (program) clReleaseProgram(program); + programSource = PragmaHeader(deviceID) + ProgramHeader(numDestItems) + + FunctionCode() + KernelCode(numDestItems); + programLine = programSource.c_str(); + if (create_single_kernel_helper_with_build_options( + context, &program, &kernel, 1, &programLine, + "test_atomic_kernel", gOldAPI ? "" : nullptr)) + { + return -1; + } + // Get work group size for the new kernel + error = clGetKernelWorkGroupInfo( + kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(groupSize), + &groupSize, NULL); + test_error(error, + "Unable to obtain max work group size for device and " + "kernel combo"); + + if (LocalMemory()) + { + cl_ulong usedLocalMemory; + cl_ulong totalLocalMemory; + cl_uint maxWorkGroupSize; + + error = clGetKernelWorkGroupInfo( + kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE, + sizeof(usedLocalMemory), &usedLocalMemory, NULL); + test_error(error, "clGetKernelWorkGroupInfo failed"); + + error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE, + sizeof(totalLocalMemory), + &totalLocalMemory, NULL); + test_error(error, "clGetDeviceInfo failed"); + + // We know that each work-group is going to use typeSize * + // deviceThreadCount bytes of local memory + // so pick the maximum value for deviceThreadCount that uses all + // the local memory. + maxWorkGroupSize = + ((totalLocalMemory - usedLocalMemory) / typeSize); + + if (maxWorkGroupSize < groupSize) groupSize = maxWorkGroupSize; + } + if (CurrentGroupSize() <= groupSize) + break; + else + CurrentGroupSize(CurrentGroupSize() / 2); + } + if (CurrentGroupSize() > deviceThreadCount) + CurrentGroupSize(deviceThreadCount); + if (CurrentGroupNum(deviceThreadCount) == 1 || gOldAPI) + deviceThreadCount = + CurrentGroupSize() * CurrentGroupNum(deviceThreadCount); + threadCount = deviceThreadCount + hostThreadCount; + } + if (gDebug) + { log_info("Program source:\n"); log_info("%s\n", programLine); - } - return -1; - } - } - else if(!dataVerified) - { - log_error("ERROR: Test doesn't check total or refs; no values are verified!\n"); - return -1; - } - - if(OldValueCheck() && - !(DeclaredInProgram() && !LocalMemory())) // don't test for programs scope global atomics - // 'old' value has been overwritten by previous clEnqueueNDRangeKernel - { - /* Re-write the starting value */ - for(size_t i = 0; i < numDestItems; i++) - destItems[i] = _startValue; - refValues[0] = 0; - if(deviceThreadCount > 0) - { - error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, typeSize * numDestItems, &destItems[0], 0, NULL, NULL); - test_error(error, "Unable to write starting values!"); - - /* Run the kernel once for a single thread, so we can verify that the returned value is the original one */ - threadNum[0] = 1; - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threadNum, threadNum, 0, NULL, NULL); - test_error(error, "Unable to execute test kernel"); - - error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, typeSize, &refValues[0], 0, NULL, NULL); - test_error(error, "Unable to read reference values!"); + } + if (deviceThreadCount > 0) + log_info("\t\t(thread count %u, group size %u)\n", deviceThreadCount, + CurrentGroupSize()); + if (hostThreadCount > 0) + log_info("\t\t(host threads %u)\n", hostThreadCount); + + refValues.resize(threadCount * NumNonAtomicVariablesPerThread()); + + // Generate ref data if we have a ref generator provided + d = init_genrand(gRandomSeed); + startRefValues.resize(threadCount * NumNonAtomicVariablesPerThread()); + if (GenerateRefs(threadCount, &startRefValues[0], d)) + { + // copy ref values for host threads + memcpy(&refValues[0], &startRefValues[0], + sizeof(HostDataType) * threadCount + * NumNonAtomicVariablesPerThread()); } else { - /* Start host thread */ - HostFunction(0, 1, &destItems[0], &refValues[0]); + startRefValues.resize(0); } + free_mtdata(d); + d = NULL; + + // If we're given a num_results function, we need to determine how many + // result objects we need. If we don't have it, we assume it's just 1 This + // is final value (exact thread count is known in this place) + numDestItems = NumResults(threadCount, deviceID); - if(refValues[0] != _startValue)//destItems[0]) + destItems.resize(numDestItems); + for (cl_uint i = 0; i < numDestItems; i++) destItems[i] = _startValue; + + // Create main buffer with atomic variables (array size dependent on + // particular test) + if (UseSVM()) { - std::stringstream logLine; - logLine << "ERROR: atomic function operated correctly but did NOT return correct 'old' value " - " (should have been " << destItems[0] << ", returned " << refValues[0] << ")!\n"; - log_error("%s", logLine.str().c_str()); - if(!gDebug) - { - log_info("Program source:\n"); - log_info("%s\n", programLine); - } - return -1; - } - } - if(UseSVM()) - { - // the buffer object must first be released before the SVM buffer is freed - error = clReleaseMemObject(streams[0]); - streams[0] = 0; - test_error(error, "clReleaseMemObject failed"); - if(gUseHostPtr) - free(svmAtomicBuffer); + if (gUseHostPtr) + svmAtomicBuffer = (HostAtomicType *)malloc(typeSize * numDestItems); + else + svmAtomicBuffer = (HostAtomicType *)clSVMAlloc( + context, CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, + typeSize * numDestItems, 0); + if (!svmAtomicBuffer) + { + log_error("ERROR: clSVMAlloc failed!\n"); + return -1; + } + memcpy(svmAtomicBuffer, &destItems[0], typeSize * numDestItems); + streams[0] = + clCreateBuffer(context, CL_MEM_USE_HOST_PTR, + typeSize * numDestItems, svmAtomicBuffer, NULL); + } + else + { + streams[0] = + clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + typeSize * numDestItems, &destItems[0], NULL); + } + if (!streams[0]) + { + log_error("ERROR: Creating output array failed!\n"); + return -1; + } + // Create buffer for per-thread input/output data + if (UseSVM()) + { + if (gUseHostPtr) + svmDataBuffer = (HostDataType *)malloc( + typeSize * threadCount * NumNonAtomicVariablesPerThread()); + else + svmDataBuffer = (HostDataType *)clSVMAlloc( + context, + CL_MEM_SVM_FINE_GRAIN_BUFFER + | (SVMDataBufferAllSVMConsistent() ? CL_MEM_SVM_ATOMICS + : 0), + typeSize * threadCount * NumNonAtomicVariablesPerThread(), 0); + if (!svmDataBuffer) + { + log_error("ERROR: clSVMAlloc failed!\n"); + return -1; + } + if (startRefValues.size()) + memcpy(svmDataBuffer, &startRefValues[0], + typeSize * threadCount * NumNonAtomicVariablesPerThread()); + streams[1] = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, + typeSize * threadCount + * NumNonAtomicVariablesPerThread(), + svmDataBuffer, NULL); + } else - clSVMFree(context, svmAtomicBuffer); - error = clReleaseMemObject(streams[1]); - streams[1] = 0; - test_error(error, "clReleaseMemObject failed"); - if(gUseHostPtr) - free(svmDataBuffer); + { + streams[1] = clCreateBuffer( + context, + ((startRefValues.size() ? CL_MEM_COPY_HOST_PTR + : CL_MEM_READ_WRITE)), + typeSize * threadCount * NumNonAtomicVariablesPerThread(), + startRefValues.size() ? &startRefValues[0] : 0, NULL); + } + if (!streams[1]) + { + log_error("ERROR: Creating reference array failed!\n"); + return -1; + } + if (deviceThreadCount > 0) + { + cl_uint argInd = 0; + /* Set the arguments */ + error = + clSetKernelArg(kernel, argInd++, sizeof(threadCount), &threadCount); + test_error(error, "Unable to set kernel argument"); + error = clSetKernelArg(kernel, argInd++, sizeof(numDestItems), + &numDestItems); + test_error(error, "Unable to set indexed kernel argument"); + error = + clSetKernelArg(kernel, argInd++, sizeof(streams[0]), &streams[0]); + test_error(error, "Unable to set indexed kernel arguments"); + error = + clSetKernelArg(kernel, argInd++, sizeof(streams[1]), &streams[1]); + test_error(error, "Unable to set indexed kernel arguments"); + if (LocalMemory()) + { + error = + clSetKernelArg(kernel, argInd++, typeSize * numDestItems, NULL); + test_error(error, "Unable to set indexed local kernel argument"); + } + if (LocalRefValues()) + { + error = + clSetKernelArg(kernel, argInd++, + LocalRefValues() ? typeSize * CurrentGroupSize() + * NumNonAtomicVariablesPerThread() + : 1, + NULL); + test_error(error, "Unable to set indexed kernel argument"); + } + } + /* Configure host threads */ + std::vector hostThreadContexts(hostThreadCount); + for (unsigned int t = 0; t < hostThreadCount; t++) + { + hostThreadContexts[t].test = this; + hostThreadContexts[t].tid = deviceThreadCount + t; + hostThreadContexts[t].threadCount = threadCount; + hostThreadContexts[t].destMemory = + UseSVM() ? svmAtomicBuffer : &destItems[0]; + hostThreadContexts[t].oldValues = + UseSVM() ? svmDataBuffer : &refValues[0]; + } + + if (deviceThreadCount > 0) + { + /* Run the kernel */ + threadNum[0] = deviceThreadCount; + groupSize = CurrentGroupSize(); + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threadNum, + &groupSize, 0, NULL, NULL); + test_error(error, "Unable to execute test kernel"); + /* start device threads */ + error = clFlush(queue); + test_error(error, "clFlush failed"); + } + + /* Start host threads and wait for finish */ + if (hostThreadCount > 0) + ThreadPool_Do(HostThreadFunction, hostThreadCount, + &hostThreadContexts[0]); + + if (UseSVM()) + { + error = clFinish(queue); + test_error(error, "clFinish failed"); + memcpy(&destItems[0], svmAtomicBuffer, typeSize * numDestItems); + memcpy(&refValues[0], svmDataBuffer, + typeSize * threadCount * NumNonAtomicVariablesPerThread()); + } else - clSVMFree(context, svmDataBuffer); - } - _passCount++; - return 0; + { + if (deviceThreadCount > 0) + { + error = clEnqueueReadBuffer(queue, streams[0], CL_TRUE, 0, + typeSize * numDestItems, &destItems[0], + 0, NULL, NULL); + test_error(error, "Unable to read result value!"); + error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, + typeSize * deviceThreadCount + * NumNonAtomicVariablesPerThread(), + &refValues[0], 0, NULL, NULL); + test_error(error, "Unable to read reference values!"); + } + } + bool dataVerified = false; + // If we have an expectedFn, then we need to generate a final value to + // compare against. If we don't have one, it's because we're comparing ref + // values only + for (cl_uint i = 0; i < numDestItems; i++) + { + HostDataType expected; + + if (!ExpectedValue(expected, threadCount, + startRefValues.size() ? &startRefValues[0] : 0, i)) + break; // no expected value function provided + + if (expected != destItems[i]) + { + std::stringstream logLine; + logLine << "ERROR: Result " << i + << " from kernel does not validate! (should be " << expected + << ", was " << destItems[i] << ")\n"; + log_error("%s", logLine.str().c_str()); + for (i = 0; i < threadCount; i++) + { + logLine.str(""); + logLine << " --- " << i << " - "; + if (startRefValues.size()) + logLine << startRefValues[i] << " -> " << refValues[i]; + else + logLine << refValues[i]; + logLine << " --- "; + if (i < numDestItems) logLine << destItems[i]; + logLine << "\n"; + log_info("%s", logLine.str().c_str()); + } + if (!gDebug) + { + log_info("Program source:\n"); + log_info("%s\n", programLine); + } + return -1; + } + dataVerified = true; + } + + bool dataCorrect = false; + /* Use the verify function (if provided) to also check the results */ + if (VerifyRefs(dataCorrect, threadCount, &refValues[0], &destItems[0])) + { + if (!dataCorrect) + { + log_error("ERROR: Reference values did not validate!\n"); + std::stringstream logLine; + for (cl_uint i = 0; i < threadCount; i++) + for (cl_uint j = 0; j < NumNonAtomicVariablesPerThread(); j++) + { + logLine.str(""); + logLine + << " --- " << i << " - " + << refValues[i * NumNonAtomicVariablesPerThread() + j] + << " --- "; + if (j == 0 && i < numDestItems) logLine << destItems[i]; + logLine << "\n"; + log_info("%s", logLine.str().c_str()); + } + if (!gDebug) + { + log_info("Program source:\n"); + log_info("%s\n", programLine); + } + return -1; + } + } + else if (!dataVerified) + { + log_error("ERROR: Test doesn't check total or refs; no values are " + "verified!\n"); + return -1; + } + + if (OldValueCheck() + && !(DeclaredInProgram() + && !LocalMemory())) // don't test for programs scope global atomics + // 'old' value has been overwritten by previous + // clEnqueueNDRangeKernel + { + /* Re-write the starting value */ + for (size_t i = 0; i < numDestItems; i++) destItems[i] = _startValue; + refValues[0] = 0; + if (deviceThreadCount > 0) + { + error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, + typeSize * numDestItems, &destItems[0], + 0, NULL, NULL); + test_error(error, "Unable to write starting values!"); + + /* Run the kernel once for a single thread, so we can verify that + * the returned value is the original one */ + threadNum[0] = 1; + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threadNum, + threadNum, 0, NULL, NULL); + test_error(error, "Unable to execute test kernel"); + + error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, typeSize, + &refValues[0], 0, NULL, NULL); + test_error(error, "Unable to read reference values!"); + } + else + { + /* Start host thread */ + HostFunction(0, 1, &destItems[0], &refValues[0]); + } + + if (refValues[0] != _startValue) // destItems[0]) + { + std::stringstream logLine; + logLine << "ERROR: atomic function operated correctly but did NOT " + "return correct 'old' value " + " (should have been " + << destItems[0] << ", returned " << refValues[0] << ")!\n"; + log_error("%s", logLine.str().c_str()); + if (!gDebug) + { + log_info("Program source:\n"); + log_info("%s\n", programLine); + } + return -1; + } + } + if (UseSVM()) + { + // the buffer object must first be released before the SVM buffer is + // freed. The Wrapper Class method reset() will do that + streams[0].reset(); + if (gUseHostPtr) + free(svmAtomicBuffer); + else + clSVMFree(context, svmAtomicBuffer); + streams[1].reset(); + if (gUseHostPtr) + free(svmDataBuffer); + else + clSVMFree(context, svmDataBuffer); + } + _passCount++; + return 0; } #endif //_COMMON_H_ diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp index c3a190b73b..38b4e9a788 100644 --- a/test_conformance/c11_atomics/test_atomics.cpp +++ b/test_conformance/c11_atomics/test_atomics.cpp @@ -1657,12 +1657,18 @@ class CBasicTestFlag : public CBasicTestMemOrderScope -struct address_spaces_test : public detail::base_func_type -{ - // output buffer type - typedef T type; - - virtual ~address_spaces_test() {}; - // Returns test name - virtual std::string str() = 0; - // Returns OpenCL program source - virtual std::string generate_program() = 0; - // Returns kernel names IN ORDER - virtual std::vector get_kernel_names() - { - // Typical case, that is, only one kernel - return { this->get_kernel_name() }; - } - - // Return value that is expected to be in output_buffer[i] - virtual T operator()(size_t i, size_t work_group_size) = 0; - - // If local size has to be set in clEnqueueNDRangeKernel() - // this should return true; otherwise - false; - virtual bool set_local_size() - { - return false; - } - - // Calculates maximal work-group size (one dim) - virtual size_t get_max_local_size(const std::vector& kernels, - cl_device_id device, - size_t work_group_size, // default work-group size - cl_int& error) - { - size_t wg_size = work_group_size; - for(auto&k : kernels) - { - size_t max_wg_size; - error = clGetKernelWorkGroupInfo(k, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL); - RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo") - wg_size = (std::min)(max_wg_size, wg_size); - } - return wg_size; - } - - // This covers typical case: each kernel is executed once, every kernel - // has only one argument which is output buffer - virtual cl_int execute(const std::vector& kernels, - cl_mem& output_buffer, - cl_command_queue& queue, - size_t work_size, - size_t work_group_size) - { - cl_int err; - for(auto& k : kernels) - { - err = clSetKernelArg(k, 0, sizeof(output_buffer), &output_buffer); - RETURN_ON_CL_ERROR(err, "clSetKernelArg"); - - err = clEnqueueNDRangeKernel( - queue, k, 1, - NULL, &work_size, this->set_local_size() ? &work_group_size : NULL, - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel"); - } - return err; - } -}; - -template -int run_address_spaces_test(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, address_spaces_test op) -{ - cl_mem buffers[1]; - cl_program program; - std::vector kernels; - size_t wg_size; - size_t work_size[1]; - cl_int err; - - typedef typename address_spaces_test::type TYPE; - - // Don't run test for unsupported types - if(!(type_supported(device))) - { - return CL_SUCCESS; - } - - std::string code_str = op.generate_program(); - std::vector kernel_names = op.get_kernel_names(); - if(kernel_names.empty()) - { - RETURN_ON_ERROR_MSG(-1, "No kernel to run"); - } - kernels.resize(kernel_names.size()); -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0]); - return err; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0], "-cl-std=CL2.0", false); - RETURN_ON_ERROR(err) - for(size_t i = 1; i < kernels.size(); i++) - { - kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err); - RETURN_ON_CL_ERROR(err, "clCreateKernel"); - } -#else - err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0]); - RETURN_ON_ERROR(err) - for(size_t i = 1; i < kernels.size(); i++) - { - kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err); - RETURN_ON_CL_ERROR(err, "clCreateKernel"); - } -#endif - - // Find the max possible wg size for among all the kernels - wg_size = op.get_max_local_size(kernels, device, 1024, err); - RETURN_ON_ERROR(err); - - work_size[0] = count; - if(op.set_local_size()) - { - size_t wg_number = static_cast( - std::ceil(static_cast(count) / wg_size) - ); - work_size[0] = wg_number * wg_size; - } - - // output on host - std::vector output = generate_output(work_size[0], 9999); - - // output buffer - buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(TYPE) * output.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer") - - // Execute test - err = op.execute(kernels, buffers[0], queue, work_size[0], wg_size); - RETURN_ON_ERROR(err) - - err = clEnqueueReadBuffer( - queue, buffers[0], CL_TRUE, 0, sizeof(TYPE) * output.size(), - static_cast(output.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer"); - - for(size_t i = 0; i < output.size(); i++) - { - TYPE v = op(i, wg_size); - if(!(are_equal(v, output[i], detail::make_value(0), op))) - { - RETURN_ON_ERROR_MSG(-1, - "test_%s(%s) failed. Expected: %s, got: %s", op.str().c_str(), type_name().c_str(), - format_value(v).c_str(), format_value(output[i]).c_str() - ); - } - } - log_info("test_%s(%s) passed\n", op.str().c_str(), type_name().c_str()); - - clReleaseMemObject(buffers[0]); - for(auto& k : kernels) - clReleaseKernel(k); - clReleaseProgram(program); - return err; -} - -#endif // TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_COMMON_HPP diff --git a/test_conformance/clcpp/address_spaces/main.cpp b/test_conformance/clcpp/address_spaces/main.cpp deleted file mode 100644 index d618e17977..0000000000 --- a/test_conformance/clcpp/address_spaces/main.cpp +++ /dev/null @@ -1,25 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "../common.hpp" - -#include "test_pointer_types.hpp" -#include "test_storage_types.hpp" - -int main(int argc, const char *argv[]) -{ - auto& tests = autotest::test_suite::global_test_suite().test_defs; - return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0); -} diff --git a/test_conformance/clcpp/address_spaces/test_pointer_types.hpp b/test_conformance/clcpp/address_spaces/test_pointer_types.hpp deleted file mode 100644 index af228d0d94..0000000000 --- a/test_conformance/clcpp/address_spaces/test_pointer_types.hpp +++ /dev/null @@ -1,412 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_TEST_POINTER_TYPES_HPP -#define TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_TEST_POINTER_TYPES_HPP - -#include - -#include "common.hpp" - -// ---------------------------- -// ---------- PRIVATE -// ---------------------------- - -template -struct private_pointer_test : public address_spaces_test -{ - std::string str() - { - return "private_pointer"; - } - - T operator()(size_t i, size_t work_group_size) - { - typedef typename scalar_type::type SCALAR; - (void) work_group_size; - return detail::make_value(static_cast(i)); - } - - // Each work-item writes its global id to output[work-item-global-id] - std::string generate_program() - { - // ----------------------------------------------------------------------------------- - // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ - // ----------------------------------------------------------------------------------- - #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - return - "__kernel void " + this->get_kernel_name() + "(global " + type_name() + " *output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " output[gid] = (" + type_name() + ")(gid);\n" - "}\n"; - - #else - return - "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void " + this->get_kernel_name() + "(global_ptr<" + type_name() + "[]> output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " typedef " + type_name() + " TYPE;\n" - " TYPE v = TYPE(gid);\n" - " private_ptr v_ptr1(dynamic_asptr_cast>(&v));\n" - " private_ptr v_ptr2(v_ptr1);\n" - " TYPE a[] = { TYPE(0), TYPE(1) };\n" - " private_ptr a_ptr = dynamic_asptr_cast>(a);\n" - " a_ptr++;\n" - " TYPE * a_ptr2 = a_ptr.get();\n" - " *a_ptr2 = *v_ptr2;\n" - " output[gid] = a[1];\n" - "}\n"; - #endif - } -}; - -AUTO_TEST_CASE(test_private_pointer) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - - // private pointer - RUN_ADDRESS_SPACES_TEST_MACRO(private_pointer_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(private_pointer_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(private_pointer_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(private_pointer_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(private_pointer_test()); - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -// ---------------------------- -// ---------- LOCAL -// ---------------------------- - -template -struct local_pointer_test : public address_spaces_test -{ - std::string str() - { - return "local_pointer"; - } - - T operator()(size_t i, size_t work_group_size) - { - typedef typename scalar_type::type SCALAR; - size_t r = i / work_group_size; - return detail::make_value(static_cast(r)); - } - - bool set_local_size() - { - return true; - } - - size_t get_max_local_size(const std::vector& kernels, - cl_device_id device, - size_t work_group_size, // default work-group size - cl_int& error) - { - // Set size of the local memory, we need to to this to correctly calculate - // max possible work-group size. - // Additionally this already set 2nd argument of the test kernel, so we don't - // have to modify execute() method. - error = clSetKernelArg(kernels[0], 1, sizeof(cl_uint), NULL); - RETURN_ON_CL_ERROR(error, "clSetKernelArg"); - - size_t wg_size; - error = clGetKernelWorkGroupInfo( - kernels[0], device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL - ); - RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo") - wg_size = wg_size <= work_group_size ? wg_size : work_group_size; - return wg_size; - } - - // Every work-item writes id of its work-group to output[work-item-global-id] - std::string generate_program() - { - // ----------------------------------------------------------------------------------- - // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ - // ----------------------------------------------------------------------------------- - #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - return - "__kernel void " + this->get_kernel_name() + "(global " + type_name() + " *output, " - "local uint * local_mem_ptr)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " output[gid] = (" + type_name() + ")(get_group_id(0));\n" - "}\n"; - - #else - return - "#include \n" - "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void " + this->get_kernel_name() + "(global_ptr<" + type_name() + "[]> output, " - "local_ptr local_mem_ptr)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " size_t lid = get_local_id(0);\n" - " typedef " + type_name() + " TYPE;\n" - // 1st work-item in work-group writes get_group_id() to var - " local var;\n" - " local_ptr var_ptr = var.ptr();\n" - " if(lid == 0) { *var_ptr = get_group_id(0); }\n" - " work_group_barrier(mem_fence::local);\n" - // last work-item in work-group writes var to 1st element of local_mem - " local_ptr local_mem_ptr2(local_mem_ptr);\n" - " auto local_mem_ptr3 = local_mem_ptr2.release();\n" - " if(lid == (get_local_size(0) - 1)) { *(local_mem_ptr3) = var; }\n" - " work_group_barrier(mem_fence::local);\n" - // each work-item in work-group writes local_mem_ptr[0] to output[work-item-global-id] - " output[gid] = local_mem_ptr[0];\n" - "}\n"; - #endif - } -}; - -AUTO_TEST_CASE(test_local_pointer) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - - // local pointer - RUN_ADDRESS_SPACES_TEST_MACRO(local_pointer_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(local_pointer_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(local_pointer_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(local_pointer_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(local_pointer_test()); - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -// ---------------------------- -// ---------- GLOBAL -// ---------------------------- - -template -struct global_pointer_test : public address_spaces_test -{ - std::string str() - { - return "global_pointer"; - } - - T operator()(size_t i, size_t work_group_size) - { - typedef typename scalar_type::type SCALAR; - (void) work_group_size; - return detail::make_value(static_cast(i)); - } - - // Each work-item writes its global id to output[work-item-global-id] - std::string generate_program() - { - // ----------------------------------------------------------------------------------- - // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ - // ----------------------------------------------------------------------------------- - #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - return - "__kernel void " + this->get_kernel_name() + "(global " + type_name() + " *output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " output[gid] = (" + type_name() + ")(gid);\n" - "}\n"; - - #else - return - "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "typedef " + type_name() + " TYPE;\n" - "void set_to_gid(global_ptr ptr)\n" - "{\n" - " *ptr = TYPE(get_global_id(0));" - "}\n" - "__kernel void " + this->get_kernel_name() + "(global_ptr output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " auto ptr = output.get();\n" - " global_ptr ptr2(ptr);\n" - " ptr2 += ptrdiff_t(gid);\n" - " set_to_gid(ptr2);\n" - "}\n"; - #endif - } -}; - -AUTO_TEST_CASE(test_global_pointer) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - - // global pointer - RUN_ADDRESS_SPACES_TEST_MACRO(global_pointer_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(global_pointer_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(global_pointer_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(global_pointer_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(global_pointer_test()); - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -// ---------------------------- -// ---------- CONSTANT -// ---------------------------- - -template -struct constant_pointer_test : public address_spaces_test -{ - // m_test_value is just a random value we use in this test. - constant_pointer_test() : m_test_value(0xdeaddeadU) - { - - } - - std::string str() - { - return "constant_pointer"; - } - - T operator()(size_t i, size_t work_group_size) - { - typedef typename scalar_type::type SCALAR; - (void) work_group_size; - return detail::make_value(static_cast(m_test_value)); - } - - // Each work-item writes m_test_value to output[work-item-global-id] - std::string generate_program() - { - // ----------------------------------------------------------------------------------- - // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ - // ----------------------------------------------------------------------------------- - #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - return - "__kernel void " + this->get_kernel_name() + "(global " + type_name() + " *output, " - "constant uint * const_ptr)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " output[gid] = (" + type_name() + ")(const_ptr[0]);\n" - "}\n"; - - #else - return - "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "typedef " + type_name() + " TYPE;\n" - "__kernel void " + this->get_kernel_name() + "(global_ptr output, " - "constant_ptr const_ptr)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " constant_ptr const_ptr2 = const_ptr;\n" - " auto const_ptr3 = const_ptr2.get();\n" - " output[gid] = *const_ptr3;\n" - "}\n"; - #endif - } - - // execute() method needs to be modified, to create additional buffer - // and set it in 2nd arg (constant_ptr const_ptr) - cl_int execute(const std::vector& kernels, - cl_mem& output_buffer, - cl_command_queue& queue, - size_t work_size, - size_t work_group_size) - { - cl_int err; - - // Get context from queue - cl_context context; - err = clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, NULL); - RETURN_ON_CL_ERROR(err, "clGetCommandQueueInfo"); - - // Create constant buffer - auto const_buff = clCreateBuffer(context, CL_MEM_READ_ONLY, - sizeof(cl_uint), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - // Write m_test_value to const_buff - err = clEnqueueWriteBuffer( - queue, const_buff, CL_TRUE, 0, sizeof(cl_uint), - static_cast(&m_test_value), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer"); - - err = clSetKernelArg(kernels[0], 0, sizeof(output_buffer), &output_buffer); - err |= clSetKernelArg(kernels[0], 1, sizeof(const_buff), &const_buff); - RETURN_ON_CL_ERROR(err, "clSetKernelArg"); - - err = clEnqueueNDRangeKernel( - queue, kernels[0], 1, NULL, &work_size, this->set_local_size() ? &work_group_size : NULL, 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel"); - - err = clFinish(queue); - RETURN_ON_CL_ERROR(err, "clFinish"); - - err = clReleaseMemObject(const_buff); - RETURN_ON_CL_ERROR(err, "clReleaseMemObject"); - return err; - } - -private: - cl_uint m_test_value; -}; - -AUTO_TEST_CASE(test_constant_pointer) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - - // constant pointer - RUN_ADDRESS_SPACES_TEST_MACRO(constant_pointer_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(constant_pointer_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(constant_pointer_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(constant_pointer_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(constant_pointer_test()); - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_TEST_POINTER_TYPES_HPP diff --git a/test_conformance/clcpp/address_spaces/test_storage_types.hpp b/test_conformance/clcpp/address_spaces/test_storage_types.hpp deleted file mode 100644 index e47f9523e7..0000000000 --- a/test_conformance/clcpp/address_spaces/test_storage_types.hpp +++ /dev/null @@ -1,418 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_TEST_STORAGE_TYPES_HPP -#define TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_TEST_STORAGE_TYPES_HPP - -#include - -#include "common.hpp" - -// ---------------------------- -// ---------- PRIVATE -// ---------------------------- - -template -struct private_storage_test : public address_spaces_test -{ - std::string str() - { - return "private_storage"; - } - - T operator()(size_t i, size_t work_group_size) - { - typedef typename scalar_type::type SCALAR; - (void) work_group_size; - return detail::make_value(static_cast(i)); - } - - // Each work-item writes its global id to output[work-item-global-id] - std::string generate_program() - { - // ----------------------------------------------------------------------------------- - // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ - // ----------------------------------------------------------------------------------- - #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - return - "__kernel void " + this->get_kernel_name() + "(global " + type_name() + " *output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " output[gid] = (" + type_name() + ")(gid);\n" - "}\n"; - - #else - return - "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void " + this->get_kernel_name() + "(global_ptr<" + type_name() + "[]> output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " typedef " + type_name() + " TYPE;\n" - " priv v = { TYPE(gid) };\n" - " const TYPE *v_ptr1 = &v;\n" - " private_ptr v_ptr2 = v.ptr();\n" - " TYPE v2 = *v_ptr2;\n" - " priv> a;\n" - " *(a.begin()) = v2;\n" - " output[gid] = a[0];\n" - "}\n"; - #endif - } -}; - -AUTO_TEST_CASE(test_private_storage) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - - // private storage - RUN_ADDRESS_SPACES_TEST_MACRO(private_storage_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(private_storage_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(private_storage_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(private_storage_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(private_storage_test()); - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -// ---------------------------- -// ---------- LOCAL -// ---------------------------- - -template -struct local_storage_test : public address_spaces_test -{ - std::string str() - { - return "local_storage"; - } - - T operator()(size_t i, size_t work_group_size) - { - typedef typename scalar_type::type SCALAR; - size_t r = i / work_group_size; - return detail::make_value(static_cast(r)); - } - - bool set_local_size() - { - return true; - } - - // Every work-item writes id of its work-group to output[work-item-global-id] - std::string generate_program() - { - // ----------------------------------------------------------------------------------- - // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ - // ----------------------------------------------------------------------------------- - #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - return - "__kernel void " + this->get_kernel_name() + "(global " + type_name() + " *output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " output[gid] = (" + type_name() + ")(get_group_id(0));\n" - "}\n"; - - #else - return - "#include \n" - "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - // Using program scope local variable - "local<" + type_name() + "> program_scope_var;" - "__kernel void " + this->get_kernel_name() + "(global_ptr<" + type_name() + "[]> output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " size_t lid = get_local_id(0);\n" - " typedef " + type_name() + " TYPE;\n" - // 1st work-item in work-group writes get_group_id() to var - " local var;\n" - " if(lid == 0) { var = TYPE(get_group_id(0)); }\n" - " work_group_barrier(mem_fence::local);\n" - // last work-item in work-group writes var to 1st element of a - " local_ptr var_ptr = var.ptr();\n" - " TYPE var2 = *var_ptr;\n" - " local> a;\n" - " if(lid == (get_local_size(0) - 1)) { *(a.begin()) = var2; }\n" - " work_group_barrier(mem_fence::local);\n" - // 1st work-item in work-group writes a[0] to program_scope_var - " if(lid == 0) { program_scope_var = a[0]; }\n" - " work_group_barrier(mem_fence::local);\n" - " const TYPE *program_scope_var_ptr = &program_scope_var;\n" - " output[gid] = *program_scope_var_ptr;\n" - "}\n"; - #endif - } -}; - -AUTO_TEST_CASE(test_local_storage) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - - // local storage - RUN_ADDRESS_SPACES_TEST_MACRO(local_storage_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(local_storage_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(local_storage_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(local_storage_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(local_storage_test()); - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -// ---------------------------- -// ---------- GLOBAL -// ---------------------------- - -template -struct global_storage_test : public address_spaces_test -{ - // m_test_value is just a random value we use in this test. - // m_test_value should not be zero. - global_storage_test() : m_test_value(0xdeaddeadU) - { - - } - - std::string str() - { - return "global_storage"; - } - - T operator()(size_t i, size_t work_group_size) - { - typedef typename scalar_type::type SCALAR; - return detail::make_value(static_cast(m_test_value)); - } - - std::vector get_kernel_names() - { - return - { - this->get_kernel_name() + "1", - this->get_kernel_name() + "2" - }; - } - - // Every work-item writes m_test_value to output[work-item-global-id] - std::string generate_program() - { - // ----------------------------------------------------------------------------------- - // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ - // ----------------------------------------------------------------------------------- - #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - return - "__kernel void " + this->get_kernel_names()[0] + "(global " + type_name() + " *output, " - "uint test_value)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " output[gid] = (" + type_name() + ")(test_value);\n" - "}\n" - "__kernel void " + this->get_kernel_names()[1] + "(global " + type_name() + " *output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " output[gid] = output[gid];\n" - "}\n"; - #else - return - "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "typedef " + type_name() + " TYPE;\n" - // Using program scope global variable - "global> program_scope_global_array;" - "__kernel void " + this->get_kernel_names()[0] + "(global_ptr<" + type_name() + "[]> output, " - "uint test_value)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - // 1st work-item writes test_value to program_scope_global_array[0] - " if(gid == 0) { program_scope_global_array[0] = test_value; }\n" - "}\n" - "__kernel void " + this->get_kernel_names()[1] + "(global_ptr<" + type_name() + "[]> output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " static global func_scope_global_var { 0 };\n" - // if (func_scope_global_var == 1) is true then - // each work-item saves program_scope_global_array[0] to output[work-item-global-id] - " if(func_scope_global_var == uint(1))\n" - " {\n" - " output[gid] = program_scope_global_array[0];\n" - " return;\n" - " }\n" - // 1st work-item writes 1 to func_scope_global_var - " if(gid == 0) { func_scope_global_var = uint(1); }\n" - "}\n"; - #endif - } - - // In this test execution is quite complicated. We have two kernels. - // 1st kernel tests program scope global variable, and 2nd kernel tests - // function scope global variable (that's why it is run twice). - cl_int execute(const std::vector& kernels, - cl_mem& output_buffer, - cl_command_queue& queue, - size_t work_size, - size_t wg_size) - { - cl_int err; - err = clSetKernelArg(kernels[0], 0, sizeof(output_buffer), &output_buffer); - err |= clSetKernelArg(kernels[0], 1, sizeof(cl_uint), &m_test_value); - RETURN_ON_CL_ERROR(err, "clSetKernelArg"); - - // Run first kernel, once. - // This kernel saves m_test_value to program scope global variable called program_scope_global_var - err = clEnqueueNDRangeKernel( - queue, kernels[0], 1, NULL, &work_size, this->set_local_size() ? &wg_size : NULL, 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel"); - err = clFinish(queue); - RETURN_ON_CL_ERROR(err, "clFinish") - - err = clSetKernelArg(kernels[1], 0, sizeof(output_buffer), &output_buffer); - // Run 2nd kernel, twice. - // 1st run: program_scope_global_var is saved to function scope global array called func_scope_global_array - // 2nd run: each work-item saves func_scope_global_array[0] to ouput[work-item-global-id] - for(size_t i = 0; i < 2; i++) - { - err = clEnqueueNDRangeKernel( - queue, kernels[1], 1, NULL, &work_size, this->set_local_size() ? &wg_size : NULL, 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel"); - err = clFinish(queue); - RETURN_ON_CL_ERROR(err, "clFinish") - } - return err; - } - -private: - cl_uint m_test_value; -}; - -AUTO_TEST_CASE(test_global_storage) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - - RUN_ADDRESS_SPACES_TEST_MACRO(global_storage_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(global_storage_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(global_storage_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(global_storage_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(global_storage_test()); - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -// ---------------------------- -// ---------- CONSTANT -// ---------------------------- - -template -struct constant_storage_test : public address_spaces_test -{ - // m_test_value is just a random value we use in this test. - constant_storage_test() : m_test_value(0xdeaddeadU) - { - - } - - std::string str() - { - return "constant_storage"; - } - - T operator()(size_t i, size_t work_group_size) - { - typedef typename scalar_type::type SCALAR; - return detail::make_value(static_cast(m_test_value)); - } - - // Every work-item writes m_test_value to output[work-item-global-id] - std::string generate_program() - { - // ----------------------------------------------------------------------------------- - // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ - // ----------------------------------------------------------------------------------- - #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - return - "__kernel void " + this->get_kernel_name() + "(global " + type_name() + " *output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " output[gid] = (" + type_name() + ")(" + std::to_string(m_test_value) + ");\n" - "}\n"; - - #else - return - "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - // Program scope constant variable, program_scope_var == (m_test_value - 1) - "constant program_scope_const{ (" + std::to_string(m_test_value) + " - 1) };" - "__kernel void " + this->get_kernel_name() + "(global_ptr<" + type_name() + "[]> output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " typedef " + type_name() + " TYPE;\n" - " static constant func_scope_const{ 1 };\n" - " constant_ptr ps_const_ptr = program_scope_const.ptr();\n" - // " constant_ptr> fs_const_ptr = &func_scope_const;\n" - " output[gid] = TYPE(*ps_const_ptr + func_scope_const);\n" - "}\n"; - #endif - } -private: - cl_uint m_test_value; -}; - -AUTO_TEST_CASE(test_constant_storage) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - - RUN_ADDRESS_SPACES_TEST_MACRO(constant_storage_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(constant_storage_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(constant_storage_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(constant_storage_test()); - RUN_ADDRESS_SPACES_TEST_MACRO(constant_storage_test()); - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_TEST_STORAGE_TYPES_HPP diff --git a/test_conformance/clcpp/api/CMakeLists.txt b/test_conformance/clcpp/api/CMakeLists.txt deleted file mode 100644 index 30763d6f62..0000000000 --- a/test_conformance/clcpp/api/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -set(MODULE_NAME CPP_API) - -set(${MODULE_NAME}_SOURCES - main.cpp -) - -include(../../CMakeCommon.txt) diff --git a/test_conformance/clcpp/api/main.cpp b/test_conformance/clcpp/api/main.cpp deleted file mode 100644 index 7652838420..0000000000 --- a/test_conformance/clcpp/api/main.cpp +++ /dev/null @@ -1,27 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "../common.hpp" - -#include "test_spec_consts.hpp" -#include "test_ctors_dtors.hpp" -#include "test_ctors.hpp" -#include "test_dtors.hpp" - -int main(int argc, const char *argv[]) -{ - auto& tests = autotest::test_suite::global_test_suite().test_defs; - return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0); -} diff --git a/test_conformance/clcpp/api/test_ctors.hpp b/test_conformance/clcpp/api/test_ctors.hpp deleted file mode 100644 index ae0695ca69..0000000000 --- a/test_conformance/clcpp/api/test_ctors.hpp +++ /dev/null @@ -1,487 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_API_TEST_CTORS_HPP -#define TEST_CONFORMANCE_CLCPP_API_TEST_CTORS_HPP - -#include -#include -#include -#include - -#include "../common.hpp" - -// TEST 1 -// Verify that constructors are executed before any kernel is executed. -// Verify that when present, multiple constructors are executed. The order between -// constructors is undefined, but they should all execute. - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -const char * kernel_test_ctors_executed = - "__kernel void test_ctors_executed(global uint *output)\n" - "{\n" - " ulong gid = get_global_id(0);\n" - " output[gid] = 0;\n" - "}\n" -; -const char * kernel_test_ctors_executed_multiple_ctors = - "__kernel void test_ctors_executed_multiple_ctors(global uint *output)\n" - "{\n" - " ulong gid = get_global_id(0);\n" - " output[gid] = 0;\n" - "}\n" -; -#else -const char * kernel_test_ctors_executed = - "#include \n" - "#include \n" - "using namespace cl;\n" - "struct ctor_test_class {\n" - // non-trivial ctor - " ctor_test_class(int y) { x = y;};\n" - " int x;\n" - "};\n" - // global scope program variable - "ctor_test_class global_var(int(0xbeefbeef));\n" - "__kernel void test_ctors_executed(global_ptr output)\n" - "{\n" - " ulong gid = get_global_id(0);\n" - " int result = 0;\n" - " if(global_var.x != int(0xbeefbeef)) result = 1;\n" - " output[gid] = result;\n" - "}\n" -; -const char * kernel_test_ctors_executed_multiple_ctors = - "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "template\n" - "struct ctor_test_class {\n" - // non-trivial ctor - " ctor_test_class(T y) { x = y;};\n" - " T x;\n" - "};\n" - // global scope program variables - "ctor_test_class global_var0(int(0xbeefbeef));\n" - "ctor_test_class global_var1(uint(0xbeefbeefU));\n" - "ctor_test_class global_var2(float(FLT_MAX));\n" - "__kernel void test_ctors_executed_multiple_ctors(global_ptr output)\n" - "{\n" - " ulong gid = get_global_id(0);\n" - " int result = 0;\n" - " if(global_var0.x != int(0xbeefbeef)) result = 1;\n" - " if(global_var1.x != uint(0xbeefbeefU)) result = 1;\n" - " if(global_var2.x != float(FLT_MAX)) result = 1;\n" - " output[gid] = result;\n" - "}\n" -; -#endif - -int test_ctors_execution(cl_device_id device, - cl_context context, - cl_command_queue queue, - int count, - std::string kernel_name, - const char * kernel_source) -{ - int error = CL_SUCCESS; - - cl_mem output_buffer; - cl_program program; - cl_kernel kernel; - - size_t dim = 1; - size_t work_size[1]; -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - error = create_opencl_kernel(context, &program, &kernel, kernel_source, kernel_name); - RETURN_ON_ERROR(error) - return error; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - error = create_opencl_kernel(context, &program, &kernel, kernel_source, kernel_name, "", false); - RETURN_ON_ERROR(error) -// Normal run -#else - error = create_opencl_kernel(context, &program, &kernel, kernel_source, kernel_name); - RETURN_ON_ERROR(error) -#endif - - // host vector, size == count, output[0...count-1] == 1 - std::vector output(count, cl_uint(1)); - output_buffer = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_uint) * output.size(), NULL, &error); - RETURN_ON_CL_ERROR(error, "clCreateBuffer") - - error = clEnqueueWriteBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_uint) * output.size(), static_cast(output.data()), 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer") - - error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - - work_size[0] = output.size(); - error = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, work_size, NULL, 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel") - - error = clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_uint) * output.size(), static_cast(output.data()), 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer") - - size_t sum = std::accumulate(output.begin(), output.end(), size_t(0)); - if(sum != 0) - { - error = -1; - CHECK_ERROR_MSG(error, "Test %s failed.", kernel_name.c_str()); - } - - clReleaseMemObject(output_buffer); - clReleaseKernel(kernel); - clReleaseProgram(program); - return error; -} - -AUTO_TEST_CASE(test_global_scope_ctors_executed) -(cl_device_id device, cl_context context, cl_command_queue queue, int count) -{ - int error = CL_SUCCESS; - int local_error = CL_SUCCESS; - - local_error = test_ctors_execution( - device, context, queue, count, - "test_ctors_executed", kernel_test_ctors_executed - ); - CHECK_ERROR(local_error); - error |= local_error; - - local_error = test_ctors_execution( - device, context, queue, count, - "test_ctors_executed_multiple_ctors", kernel_test_ctors_executed_multiple_ctors - ); - CHECK_ERROR(local_error); - error |= local_error; - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -// TEST 2 -// Verify that constructors are only executed once when multiple kernels from a program are executed. - -// How: The first kernel (test_ctors_executed_once_set) is run once. It changes values of program scope -// variables, then the second kernel is run multiple times, each time verifying that global variables -// have correct values (the second kernel should observe the values assigned by the first kernel, not -// by the constructors). - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -const char * program_test_ctors_executed_once = - "__kernel void test_ctors_executed_once_set()\n" - "{\n" - "}\n" - "__kernel void test_ctors_executed_once_read(global uint *output)\n" - "{\n" - " ulong gid = get_global_id(0);\n" - " output[gid] = 0;\n" - "}\n" -; -#else -const char * program_test_ctors_executed_once = - "#include \n" - "#include \n" - "using namespace cl;\n" - // struct template - "template\n" - "struct ctor_test_class {\n" - // non-trivial ctor - " ctor_test_class(T y) { x = y;};\n" - " T x;\n" - "};\n" - // global scope program variables - "ctor_test_class global_var0(int(0));\n" - "ctor_test_class global_var1(uint(0));\n" - - "__kernel void test_ctors_executed_once_set()\n" - "{\n" - " ulong gid = get_global_id(0);\n" - " if(gid == 0) {\n" - " global_var0.x = int(0xbeefbeef);\n" - " global_var1.x = uint(0xbeefbeefU);\n" - " }\n" - "}\n\n" - - "__kernel void test_ctors_executed_once_read(global_ptr output)\n" - "{\n" - " ulong gid = get_global_id(0);\n" - " int result = 0;\n" - " if(global_var0.x != int(0xbeefbeef)) result = 1;\n" - " if(global_var1.x != uint(0xbeefbeefU)) result = 1;\n" - " output[gid] = result;\n" - "}\n" -; -#endif - -AUTO_TEST_CASE(test_global_scope_ctors_executed_once) -(cl_device_id device, cl_context context, cl_command_queue queue, int count) -{ - int error = CL_SUCCESS; - - cl_mem output_buffer; - cl_program program; - cl_kernel kernel_set_global_vars; - cl_kernel kernel_read_global_vars; - - size_t dim = 1; - size_t work_size[1]; -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - error = create_opencl_kernel( - context, &program, &kernel_set_global_vars, - program_test_ctors_executed_once, "test_ctors_executed_once_set" - ); - RETURN_ON_ERROR(error) - return error; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - error = create_opencl_kernel( - context, &program, &kernel_set_global_vars, - program_test_ctors_executed_once, "test_ctors_executed_once_set", "", false - ); - RETURN_ON_ERROR(error) - // Get the second kernel - kernel_read_global_vars = clCreateKernel(program, "test_ctors_executed_once_read", &error); - RETURN_ON_CL_ERROR(error, "clCreateKernel"); -// Normal run -#else - error = create_opencl_kernel( - context, &program, &kernel_set_global_vars, - program_test_ctors_executed_once, "test_ctors_executed_once_set" - ); - RETURN_ON_ERROR(error) - // Get the second kernel - kernel_read_global_vars = clCreateKernel(program, "test_ctors_executed_once_read", &error); - RETURN_ON_CL_ERROR(error, "clCreateKernel"); -#endif - - // Execute kernel_set_global_vars - - work_size[0] = count; - error = clEnqueueNDRangeKernel(queue, kernel_set_global_vars, dim, NULL, work_size, NULL, 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel") - - // Execute kernel_read_global_vars 4 times, each time we check if - // global variables have correct values. - - // host vector, size == count, output[0...count-1] == 1 - std::vector output(count, cl_uint(1)); - output_buffer = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_uint) * output.size(), NULL, &error); - RETURN_ON_CL_ERROR(error, "clCreateBuffer") - - for(size_t i = 0; i < 4; i++) - { - std::fill(output.begin(), output.end(), cl_uint(1)); - error = clEnqueueWriteBuffer( - queue, output_buffer, CL_TRUE, - 0, sizeof(cl_uint) * output.size(), - static_cast(output.data()), - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer") - - error = clSetKernelArg(kernel_read_global_vars, 0, sizeof(output_buffer), &output_buffer); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - - work_size[0] = output.size(); - error = clEnqueueNDRangeKernel( - queue, kernel_read_global_vars, - dim, NULL, work_size, NULL, - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel") - - error = clEnqueueReadBuffer( - queue, output_buffer, CL_TRUE, - 0, sizeof(cl_uint) * output.size(), - static_cast(output.data()), - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer") - - size_t sum = std::accumulate(output.begin(), output.end(), size_t(0)); - if(sum != 0) - { - error = -1; - CHECK_ERROR_MSG(error, "Test test_ctors_executed_onces failed."); - } - } - - clReleaseMemObject(output_buffer); - clReleaseKernel(kernel_set_global_vars); - clReleaseKernel(kernel_read_global_vars); - clReleaseProgram(program); - return error; -} - -// TEST3 -// Verify that when constructor is executed, the ND-range used is (1,1,1). - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -const char * program_test_ctors_ndrange = - "__kernel void test_ctors_ndrange(global int *output)\n" - "{\n" - " ulong gid = get_global_id(0);\n" - " output[gid] = 0;\n" - "}\n" -; -#else -const char * program_test_ctors_ndrange = - "#include \n" - "#include \n" - "using namespace cl;\n" - // struct - "struct ctor_test_class {\n" - // non-trivial ctor - " ctor_test_class() {\n" - " x = get_global_size(0);\n" - " y = get_global_size(1);\n" - " z = get_global_size(2);\n" - " };\n" - " ulong x;\n" - " ulong y;\n" - " ulong z;\n" - // return true if the ND-range used when ctor was exectured was - // (1, 1, 1); otherwise - false - " bool check() { return (x == 1) && (y == 1) && (z == 1);}" - "};\n" - // global scope program variables - "ctor_test_class global_var0;\n" - "ctor_test_class global_var1;\n" - - "__kernel void test_ctors_ndrange(global_ptr output)\n" - "{\n" - " ulong gid = get_global_id(0);\n" - " int result = 0;\n" - " if(!global_var0.check()) result = 1;\n" - " if(!global_var1.check()) result = 1;\n" - " output[gid] = result;\n" - "}\n" -; -#endif - -AUTO_TEST_CASE(test_global_scope_ctors_ndrange) -(cl_device_id device, cl_context context, cl_command_queue queue, int count) -{ - int error = CL_SUCCESS; - - cl_mem output_buffer; - cl_program program; - cl_kernel kernel; - - size_t dim = 1; - size_t work_size[1]; -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - error = create_opencl_kernel( - context, &program, &kernel, - program_test_ctors_ndrange, "test_ctors_ndrange" - ); - RETURN_ON_ERROR(error) - return error; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - error = create_opencl_kernel( - context, &program, &kernel, - program_test_ctors_ndrange, "test_ctors_ndrange", "", false - ); - RETURN_ON_ERROR(error) -// Normal run -#else - error = create_opencl_kernel( - context, &program, &kernel, - program_test_ctors_ndrange, "test_ctors_ndrange" - ); - RETURN_ON_ERROR(error) -#endif - - // host vector, size == count, output[0...count-1] == 1 - std::vector output(count, cl_uint(1)); - output_buffer = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_uint) * output.size(), NULL, &error); - RETURN_ON_CL_ERROR(error, "clCreateBuffer") - - error = clEnqueueWriteBuffer( - queue, output_buffer, CL_TRUE, - 0, sizeof(cl_uint) * output.size(), - static_cast(output.data()), - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer") - - error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - - work_size[0] = output.size(); - error = clEnqueueNDRangeKernel( - queue, kernel, - dim, NULL, work_size, NULL, - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel") - - error = clEnqueueReadBuffer( - queue, output_buffer, CL_TRUE, - 0, sizeof(cl_uint) * output.size(), - static_cast(output.data()), - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer") - - size_t sum = std::accumulate(output.begin(), output.end(), size_t(0)); - if(sum != 0) - { - error = -1; - CHECK_ERROR_MSG(error, "Test test_ctors_executed_ndrange failed."); - } - - clReleaseMemObject(output_buffer); - clReleaseKernel(kernel); - clReleaseProgram(program); - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_API_TEST_CTORS_HPP diff --git a/test_conformance/clcpp/api/test_ctors_dtors.hpp b/test_conformance/clcpp/api/test_ctors_dtors.hpp deleted file mode 100644 index 02838fa771..0000000000 --- a/test_conformance/clcpp/api/test_ctors_dtors.hpp +++ /dev/null @@ -1,185 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_API_TEST_CTORS_DTORS_HPP -#define TEST_CONFORMANCE_CLCPP_API_TEST_CTORS_DTORS_HPP - -#include -#include -#include - -#include "../common.hpp" - -// Verify queries clGetProgramInfo correctly return the presence of constructors and/or destructors -// in the program (using option CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT/CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT) -// (both are present, either one is present, none is present). - -std::string generate_ctor_dtor_program(const bool ctor, const bool dtor) -{ - std::string program; - if(ctor) - { - program += - "struct ctor_test_class {\n" - // non-trivial ctor - " ctor_test_class(int y) { x = y;};\n" - " int x;\n" - "};\n" - "ctor_test_class ctor = ctor_test_class(1024);\n" - ; - } - if(dtor) - { - program += - "struct dtor_test_class {\n" - // non-trivial dtor - " ~dtor_test_class() { x = -1024; };\n" - " int x;\n" - "};\n" - "dtor_test_class dtor;\n" - ; - } - program += "__kernel void test_ctor_dtor()\n {\n }\n"; - return program; -} - -int test_get_program_info_global_ctors_dtors_present(cl_device_id device, - cl_context context, - cl_command_queue queue, - const bool ctor, - const bool dtor) -{ - int error = CL_SUCCESS; - cl_program program; - - // program source and options - std::string options = ""; - std::string source = generate_ctor_dtor_program(ctor, dtor); - const char * source_ptr = source.c_str(); - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - // Create program - error = create_openclcpp_program(context, &program, 1, &source_ptr, options.c_str()); - RETURN_ON_ERROR(error) - return error; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - return CL_SUCCESS; -// Normal run -#else - // Create program - error = create_openclcpp_program(context, &program, 1, &source_ptr, options.c_str()); - RETURN_ON_ERROR(error) -#endif - - // CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT cl_bool - // This indicates that the program object contains non-trivial constructor(s) that will be - // executed by runtime before any kernel from the program is executed. - - // CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT cl_bool - // This indicates that the program object contains non-trivial destructor(s) that will be - // executed by runtime when program is destroyed. - - // CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT - cl_bool ctors_present; - size_t cl_bool_size; - error = clGetProgramInfo( - program, - CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT, - sizeof(cl_bool), - static_cast(&ctors_present), - &cl_bool_size - ); - RETURN_ON_CL_ERROR(error, "clGetProgramInfo") - if(cl_bool_size != sizeof(cl_bool)) - { - error = -1; - CHECK_ERROR_MSG(-1, "Test failed, param_value_size_ret != sizeof(cl_bool) (%lu != %lu).", cl_bool_size, sizeof(cl_bool)); - } - if(ctor && ctors_present != CL_TRUE) - { - error = -1; - CHECK_ERROR_MSG(-1, "Test failed, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT: 0, should be: 1."); - } - else if(!ctor && ctors_present == CL_TRUE) - { - error = -1; - CHECK_ERROR_MSG(-1, "Test failed, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT: 1, should be: 0."); - } - - // CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT - cl_bool dtors_present = 0; - error = clGetProgramInfo( - program, - CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT, - sizeof(cl_bool), - static_cast(&ctors_present), - &cl_bool_size - ); - RETURN_ON_CL_ERROR(error, "clGetProgramInfo") - if(cl_bool_size != sizeof(cl_bool)) - { - error = -1; - CHECK_ERROR_MSG(-1, "Test failed, param_value_size_ret != sizeof(cl_bool) (%lu != %lu).", cl_bool_size, sizeof(cl_bool)); - } - if(dtor && dtors_present != CL_TRUE) - { - error = -1; - CHECK_ERROR_MSG(-1, "Test failed, CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT: 0, should be: 1."); - } - else if(!dtor && dtors_present == CL_TRUE) - { - error = -1; - CHECK_ERROR_MSG(-1, "Test failed, CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT: 1, should be: 0."); - } - - clReleaseProgram(program); - return error; -} - -AUTO_TEST_CASE(test_global_scope_ctors_dtors_present) -(cl_device_id device, cl_context context, cl_command_queue queue, int count) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - // both present - last_error = test_get_program_info_global_ctors_dtors_present(device, context, queue, true, true); - CHECK_ERROR(last_error); - error |= last_error; - // dtor - last_error = test_get_program_info_global_ctors_dtors_present(device, context, queue, false, true); - CHECK_ERROR(last_error); - error |= last_error; - // ctor - last_error = test_get_program_info_global_ctors_dtors_present(device, context, queue, true, false); - CHECK_ERROR(last_error); - error |= last_error; - // none present - last_error = test_get_program_info_global_ctors_dtors_present(device, context, queue, false, false); - CHECK_ERROR(last_error); - error |= last_error; - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_API_TEST_CTORS_DTORS_HPP diff --git a/test_conformance/clcpp/api/test_dtors.hpp b/test_conformance/clcpp/api/test_dtors.hpp deleted file mode 100644 index e04cbb1cdd..0000000000 --- a/test_conformance/clcpp/api/test_dtors.hpp +++ /dev/null @@ -1,559 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_API_TEST_DTORS_HPP -#define TEST_CONFORMANCE_CLCPP_API_TEST_DTORS_HPP - -#include -#include -#include -#include - -#include "../common.hpp" - -// TEST 1 -// Verify that destructor is executed. - -// How: destructor of struct dtor_test_class has a side effect: zeroing buffer. If values -// in buffer are not zeros after releasing program, destructor was not executed. - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -const char * program_test_dtor_is_executed = - "__kernel void test_dtor_is_executed(global uint *output)\n" - "{\n" - " ulong gid = get_global_id(0);\n" - " output[gid] = 0;\n" - "}\n" -; -#else -const char * program_test_dtor_is_executed = - "#include \n" - "#include \n" - "using namespace cl;\n" - // struct - "struct dtor_test_class {\n" - // non-trivial dtor - // set all values in buffer to 0 - " ~dtor_test_class() {\n" - " for(ulong i = 0; i < size; i++)\n" - " buffer[i] = 0;\n" - " };\n" - " global_ptr buffer;\n" - " ulong size;\n" - "};\n" - // global scope program variable - "dtor_test_class global_var;\n" - - // values in output __MUST BE__ greater than 0 for the test to work - // correctly - "__kernel void test_dtor_is_executed(global_ptr output)\n" - "{\n" - " ulong gid = get_global_id(0);\n" - // set buffer and size in global var - " if(gid == 0){\n" - " global_var.buffer = output;\n" - " global_var.size = get_global_size(0);\n" - " }\n" - "}\n" -; -#endif - -AUTO_TEST_CASE(test_global_scope_dtor_is_executed) -(cl_device_id device, cl_context context, cl_command_queue queue, int count) -{ - int error = CL_SUCCESS; - - cl_mem output_buffer; - cl_program program; - cl_kernel kernel; - - size_t dim = 1; - size_t work_size[1]; -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - error = create_opencl_kernel( - context, &program, &kernel, - program_test_dtor_is_executed, "test_dtor_is_executed" - ); - RETURN_ON_ERROR(error) - return error; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - error = create_opencl_kernel( - context, &program, &kernel, - program_test_dtor_is_executed, "test_dtor_is_executed", "", false - ); - RETURN_ON_ERROR(error) -// Normal run -#else - error = create_opencl_kernel( - context, &program, &kernel, - program_test_dtor_is_executed, "test_dtor_is_executed" - ); - RETURN_ON_ERROR(error) -#endif - - // host vector, size == count, output[0...count-1] == 0xbeefbeef (3203383023) - // values in output __MUST BE__ greater than 0 for the test to work correctly - std::vector output(count, cl_uint(0xbeefbeef)); - output_buffer = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_uint) * output.size(), NULL, &error); - RETURN_ON_CL_ERROR(error, "clCreateBuffer") - - error = clEnqueueWriteBuffer( - queue, output_buffer, CL_TRUE, - 0, sizeof(cl_uint) * output.size(), - static_cast(output.data()), - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer") - - error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - - work_size[0] = output.size(); - error = clEnqueueNDRangeKernel( - queue, kernel, - dim, NULL, work_size, NULL, - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel") - - // Release kernel and program - // Dtor should be called now - error = clReleaseKernel(kernel); - RETURN_ON_CL_ERROR(error, "clReleaseKernel") - error = clReleaseProgram(program); - RETURN_ON_CL_ERROR(error, "clReleaseProgram") - - // Finish - error = clFinish(queue); - RETURN_ON_CL_ERROR(error, "clFinish") - - // Read output buffer - error = clEnqueueReadBuffer( - queue, output_buffer, CL_TRUE, - 0, sizeof(cl_uint) * output.size(), - static_cast(output.data()), - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer") - - size_t sum = std::accumulate(output.begin(), output.end(), size_t(0)); - if(sum != 0) - { - error = -1; - CHECK_ERROR_MSG(error, "Test test_dtor_is_executed failed."); - } - - clReleaseMemObject(output_buffer); - return error; -} - -// TEST 2 -// Verify that multiple destructors, if present, are executed. Order between multiple -// destructors is undefined. -// Verify that each destructor is executed only once. - -// How: -// 0) dtor_test_class struct has a global pointer to a buffer, it's set by -// test_dtors_executed_once kernel. -// 1) Destructors have a side effect: each dtor writes to its part of the buffer. If all -// dtors are executed, all values in that buffer should be changed. -// 2) The first time destructors are executed, they set their parts of the buffer to zero. -// Next time to 1, next time to 2 etc. Since dtors should be executed only once, all -// values in that buffer should be equal to zero. - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -const char * program_test_dtors_executed_once = - "__kernel void test_dtors_executed_once(global uint *output)\n" - "{\n" - " ulong gid = get_global_id(0);\n" - " output[gid] = 0;\n" - "}\n" -; -#else -const char * program_test_dtors_executed_once = - "#include \n" - "#include \n" - "using namespace cl;\n" - // struct - "struct dtor_test_class {\n" - // non-trivial dtor - // Set all values in range [start; end - 1] in buffer to counter. - // If dtor is executed only once (correct), all values in range - // [start; end - 1] in buffer should be equal to zero after releasing - // the program - " ~dtor_test_class() {\n" - " for(ulong i = start; i < end; i++){\n" - " buffer[i] = counter;\n" - " };\n" - " counter++;\n" - " };\n" - " global_ptr buffer;\n" - " ulong start;\n" - " ulong end;\n" - " ulong counter;\n" - "};\n" - // global scope program variables - "dtor_test_class global_var0;\n" - "dtor_test_class global_var1;\n" - "dtor_test_class global_var2;\n" - "dtor_test_class global_var3;\n" - - // values in output __MUST BE__ greater than 0 for the test to work correctly - "__kernel void test_dtors_executed_once(global_ptr output)\n" - "{\n" - " ulong gid = get_global_id(0);\n" - // set buffer and size in global var - " if(gid == 0){\n" - " ulong end = get_global_size(0) / 4;" - // global_var0 - " global_var0.buffer = output;\n" - " global_var0.start = 0;\n" - " global_var0.end = end;\n" - " global_var0.counter = 0;\n" - // global_var1 - " global_var1.buffer = output;\n" - " global_var1.start = end;\n" - " end += get_global_size(0) / 4;\n" - " global_var1.end = end;\n" - " global_var1.counter = 0;\n" - // global_var2 - " global_var2.buffer = output;\n" - " global_var2.start = end;\n" - " end += get_global_size(0) / 4;\n" - " global_var2.end = end;\n" - " global_var2.counter = 0;\n" - // global_var3 - " global_var3.buffer = output;\n" - " global_var3.start = end;\n" - " global_var3.end = get_global_size(0);\n" - " global_var3.counter = 0;\n" - " }\n" - "}\n" -; -#endif - -AUTO_TEST_CASE(test_global_scope_dtors_executed_once) -(cl_device_id device, cl_context context, cl_command_queue queue, int count) -{ - int error = CL_SUCCESS; - - cl_mem output_buffer; - cl_program program; - cl_kernel kernel; - - size_t dim = 1; - size_t work_size[1]; -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - error = create_opencl_kernel( - context, &program, &kernel, - program_test_dtors_executed_once, "test_dtors_executed_once" - ); - RETURN_ON_ERROR(error) - return error; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - error = create_opencl_kernel( - context, &program, &kernel, - program_test_dtors_executed_once, "test_dtors_executed_once", "", false - ); - RETURN_ON_ERROR(error) -// Normal run -#else - error = create_opencl_kernel( - context, &program, &kernel, - program_test_dtors_executed_once, "test_dtors_executed_once" - ); - RETURN_ON_ERROR(error) -#endif - - // host vector, size == count, output[0...count-1] == 0xbeefbeef (3203383023) - // values in output __MUST BE__ greater than 0 for the test to work correctly - cl_uint init_value = cl_uint(0xbeefbeef); - std::vector output(count, init_value); - output_buffer = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_uint) * output.size(), NULL, &error); - RETURN_ON_CL_ERROR(error, "clCreateBuffer") - - error = clEnqueueWriteBuffer( - queue, output_buffer, CL_TRUE, - 0, sizeof(cl_uint) * output.size(), - static_cast(output.data()), - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer") - - error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - - work_size[0] = output.size(); - error = clEnqueueNDRangeKernel( - queue, kernel, - dim, NULL, work_size, NULL, - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel") - - - // Increments the program reference count. Twice - error = clRetainProgram(program); - RETURN_ON_CL_ERROR(error, "clRetainProgram") - error = clRetainProgram(program); - RETURN_ON_CL_ERROR(error, "clRetainProgram") - - // Should just decrement the program reference count. - error = clReleaseProgram(program); - RETURN_ON_CL_ERROR(error, "clReleaseProgram") - error = clFinish(queue); - RETURN_ON_CL_ERROR(error, "clFinish") - - // Should just decrement the program reference count. - error = clReleaseProgram(program); - RETURN_ON_CL_ERROR(error, "clReleaseProgram") - error = clFinish(queue); - RETURN_ON_CL_ERROR(error, "clFinish") - -#ifndef USE_OPENCLC_KERNELS - // At this point global scope variables should not be destroyed, - // values in output buffer should not be modified. - - // Read output buffer - error = clEnqueueReadBuffer( - queue, output_buffer, CL_TRUE, - 0, sizeof(cl_uint) * output.size(), - static_cast(output.data()), - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer") - for(auto& i : output) - { - if(i != init_value) - { - log_error("ERROR: Test test_global_scope_dtors_executed_once failed."); - log_error("\tDestructors were executed prematurely.\n"); - RETURN_ON_ERROR(-1) - } - } -#endif - - // Release kernel and program, destructors should be called now - error = clReleaseKernel(kernel); - RETURN_ON_CL_ERROR(error, "clReleaseKernel") - error = clReleaseProgram(program); - RETURN_ON_CL_ERROR(error, "clReleaseProgram") - - // Finish - error = clFinish(queue); - RETURN_ON_CL_ERROR(error, "clFinish") - - // Read output buffer - error = clEnqueueReadBuffer( - queue, output_buffer, CL_TRUE, - 0, sizeof(cl_uint) * output.size(), - static_cast(output.data()), - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer") - - size_t sum = std::accumulate(output.begin(), output.end(), size_t(0)); - if(sum != 0) - { - log_error("ERROR: Test test_global_scope_dtors_executed_once failed."); - // Maybe some dtors were not run? - for(auto& i : output) - { - if(i == init_value) - { - log_error("\tSome dtors were not executed."); - break; - } - } - log_error("\n"); - RETURN_ON_ERROR(-1) - } - - // Clean - clReleaseMemObject(output_buffer); - return error; -} - -// TEST3 -// Verify that ND-range during destructor execution is set to (1,1,1) - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -const char * program_test_dtor_ndrange = - "__kernel void test_dtor_ndrange(global uint *output)\n" - "{\n" - " ulong gid = get_global_id(0);\n" - " output[gid] = 0;\n" - "}\n" -; -#else -const char * program_test_dtor_ndrange = - "#include \n" - "#include \n" - "using namespace cl;\n" - // struct - "struct dtor_test_class {\n" - // non-trivial dtor - // set all values in buffer to 0 only if ND-range is (1, 1, 1) - " ~dtor_test_class() {\n" - " if(check()){\n" - " for(ulong i = 0; i < size; i++)\n" - " buffer[i] = 0;\n" - " }\n" - " };\n" - // return true if the ND-range is (1, 1, 1); otherwise - false - " bool check() {\n" - " return (get_global_size(0) == 1)" - " && (get_global_size(1) == 1)" - " && (get_global_size(2) == 1);\n" - " }" - " ulong size;\n" - " global_ptr buffer;\n" - "};\n" - // global scope program variable - "dtor_test_class global_var;\n" - - // values in output __MUST BE__ greater than 0 for the test to work correctly - "__kernel void test_dtor_ndrange(global_ptr output)\n" - "{\n" - " ulong gid = get_global_id(0);\n" - // set buffer and size in global var - " if(gid == 0){\n" - " global_var.buffer = output;\n" - " global_var.size = get_global_size(0);\n" - " }\n" - "}\n" -; -#endif - -AUTO_TEST_CASE(test_global_scope_dtor_ndrange) -(cl_device_id device, cl_context context, cl_command_queue queue, int count) -{ - int error = CL_SUCCESS; - - cl_mem output_buffer; - cl_program program; - cl_kernel kernel; - - size_t dim = 1; - size_t work_size[1]; -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - error = create_opencl_kernel( - context, &program, &kernel, - program_test_dtor_ndrange, "test_dtor_ndrange" - ); - RETURN_ON_ERROR(error) - return error; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - error = create_opencl_kernel( - context, &program, &kernel, - program_test_dtor_ndrange, "test_dtor_ndrange", "", false - ); - RETURN_ON_ERROR(error) -// Normal run -#else - error = create_opencl_kernel( - context, &program, &kernel, - program_test_dtor_ndrange, "test_dtor_ndrange" - ); - RETURN_ON_ERROR(error) -#endif - - // host vector, size == count, output[0...count-1] == 0xbeefbeef (3203383023) - // values in output __MUST BE__ greater than 0 for the test to work correctly - std::vector output(count, cl_uint(0xbeefbeef)); - output_buffer = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_uint) * output.size(), NULL, &error); - RETURN_ON_CL_ERROR(error, "clCreateBuffer") - - error = clEnqueueWriteBuffer( - queue, output_buffer, CL_TRUE, - 0, sizeof(cl_uint) * output.size(), - static_cast(output.data()), - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer") - - error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - - work_size[0] = output.size(); - error = clEnqueueNDRangeKernel( - queue, kernel, - dim, NULL, work_size, NULL, - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel") - - // Release kernel and program - // Dtor should be called now - error = clReleaseKernel(kernel); - RETURN_ON_CL_ERROR(error, "clReleaseKernel") - error = clReleaseProgram(program); - RETURN_ON_CL_ERROR(error, "clReleaseProgram") - - // Finish - error = clFinish(queue); - RETURN_ON_CL_ERROR(error, "clFinish") - - // Read output buffer - error = clEnqueueReadBuffer( - queue, output_buffer, CL_TRUE, - 0, sizeof(cl_uint) * output.size(), - static_cast(output.data()), - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer") - - size_t sum = std::accumulate(output.begin(), output.end(), size_t(0)); - if(sum != 0) - { - error = -1; - CHECK_ERROR_MSG(error, "Test test_dtor_ndrange failed."); - } - - clReleaseMemObject(output_buffer); - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_API_TEST_DTORS_HPP diff --git a/test_conformance/clcpp/api/test_spec_consts.hpp b/test_conformance/clcpp/api/test_spec_consts.hpp deleted file mode 100644 index c403f4d9ee..0000000000 --- a/test_conformance/clcpp/api/test_spec_consts.hpp +++ /dev/null @@ -1,480 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_API_TEST_SPEC_CONSTS_HPP -#define TEST_CONFORMANCE_CLCPP_API_TEST_SPEC_CONSTS_HPP - -#include -#include -#include - -#include "../common.hpp" - -// TEST 1 -// Verify that if left unset the specialization constant defaults to the default value set in SPIR-V (zero). - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -const char * kernel_test_spec_consts_defaults = - "__kernel void test_spec_consts_defaults(global int *output)\n" - "{\n" - " ulong gid = get_global_id(0);\n" - " output[gid] = 0;\n" - "}\n" -; -#else -const char * kernel_test_spec_consts_defaults = - "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "spec_constant spec1(0);\n" - "spec_constant spec2(0);\n" - "spec_constant spec3(0);\n" - "spec_constant spec4(0);\n" - "spec_constant spec5(0);\n" - "spec_constant spec6(0);\n" - "spec_constant spec7(0);\n" - "spec_constant spec8(0);\n" - "spec_constant spec9(0.0f);\n" - "#ifdef cl_khr_fp64\n" - "spec_constant spec10(0.0);\n" - "#endif\n" - "#ifdef cl_khr_fp16\n" - "spec_constant spec11(0.0h);\n" - "#endif\n" - "__kernel void test_spec_consts_defaults(global_ptr output)\n" - "{\n" - " ulong gid = get_global_id(0);\n" - " int result = 0;\n" - " if(get(spec1) != char(0)) result = 1;\n" - " if(get(spec2) != uchar(0)) result = 1;\n" - " if(get(spec3) != short(0)) result = 1;\n" - " if(get(spec4) != ushort(0)) result = 1;\n" - " if(get(spec5) != int(0)) result = 1;\n" - " if(get(spec6) != uint(0)) result = 1;\n" - " if(get(spec7) != long(0)) result = 1;\n" - " if(get(spec8) != ulong(0)) result = 1;\n" - " if(get(spec9) != float(0)) result = 1;\n" - "#ifdef cl_khr_fp64\n" - " if(get(spec10) != double(0)) result = 1;\n" - "#endif\n" - "#ifdef cl_khr_fp16\n" - " if(get(spec11) != half(0)) result = 1;\n" - "#endif\n" - " output[gid] = result;\n" - "}\n" -; -#endif - -AUTO_TEST_CASE(test_spec_consts_defaults) -(cl_device_id device, cl_context context, cl_command_queue queue, int count) -{ - int error = CL_SUCCESS; - - cl_mem output_buffer; - cl_program program; - cl_kernel kernel; - - size_t dim = 1; - size_t work_size[1]; - - std::string options = ""; - if(is_extension_available(device, "cl_khr_fp16")) - { - options += " -cl-fp16-enable"; - } - if(is_extension_available(device, "cl_khr_fp64")) - { - options += " -cl-fp64-enable"; - } -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - error = create_opencl_kernel(context, &program, &kernel, kernel_test_spec_consts_defaults, "test_spec_consts_defaults", options); - RETURN_ON_ERROR(error) - return error; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - error = create_opencl_kernel(context, &program, &kernel, kernel_test_spec_consts_defaults, "test_spec_consts_defaults", "", false); - RETURN_ON_ERROR(error) -// Normal run -#else - // Spec constants are NOT set before clBuildProgram (called in create_opencl_kernel), so - // they all should default to the default value set in SPIR-V (zero). - error = create_opencl_kernel(context, &program, &kernel, kernel_test_spec_consts_defaults, "test_spec_consts_defaults", options); - RETURN_ON_ERROR(error) -#endif - - // host vector, size == 1, output[0] == 1 - std::vector output(1, cl_int(1)); - output_buffer = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * output.size(), NULL, &error); - RETURN_ON_CL_ERROR(error, "clCreateBuffer") - - error = clEnqueueWriteBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast(output.data()), 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer") - - error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - - work_size[0] = output.size(); - error = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, work_size, NULL, 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKerne") - - error = clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast(output.data()), 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer") - - // if output[0] != 0, then some spec constant(s) did not default to zero. - if(output[0] != 0) - { - RETURN_ON_ERROR_MSG(-1, "Test test_spec_consts_defaults failed, output[0]: %d.", output[0]) - } - - clReleaseMemObject(output_buffer); - clReleaseKernel(kernel); - clReleaseProgram(program); - return error; -} - -// TEST 2 -// Verify that setting an existing specialization constant affects only -// the value of that constant and not of other specialization constants. - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -const char * kernel_test_spec_consts_many_constants = - "__kernel void test_spec_consts_many_constants(global int *output)\n" - "{\n" - " ulong gid = get_global_id(0);\n" - " output[gid] = 0;\n" - "}\n" -; -#else -const char * kernel_test_spec_consts_many_constants = - "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "spec_constant spec1(0);\n" - "spec_constant spec2(0);\n" - "spec_constant spec3(0);\n" - "__kernel void test_spec_consts_defaults(global_ptr output)\n" - "{\n" - " ulong gid = get_global_id(0);\n" - " int result = 0;\n" - " if(get(spec1) != int(-1024)) result += 1;\n" - " if(get(spec2) != int(0)) result += 2;\n" - " if(get(spec3) != int(1024)) result += 4;\n" - " output[gid] = result;\n" - "}\n" -; -#endif - -AUTO_TEST_CASE(test_spec_consts_many_constants) -(cl_device_id device, cl_context context, cl_command_queue queue, int count) -{ - int error = CL_SUCCESS; - - cl_mem output_buffer; - cl_program program; - cl_kernel kernel; - - size_t dim = 1; - size_t work_size[1]; - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - error = create_opencl_kernel( - context, &program, &kernel, - kernel_test_spec_consts_many_constants, "test_spec_consts_many_constants" - ); - RETURN_ON_ERROR(error) - return error; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - error = create_opencl_kernel( - context, &program, &kernel, - kernel_test_spec_consts_many_constants, "test_spec_consts_many_constants", "", false - ); - RETURN_ON_ERROR(error) -// Normal run -#else - // Create program - error = create_openclcpp_program(context, &program, 1, &kernel_test_spec_consts_many_constants); - RETURN_ON_ERROR(error) - - // Set specialization constants - - // clSetProgramSpecializationConstant( - // cl_program /* program */, cl_uint /* spec_id */, size_t /* spec_size */,const void* /* spec_value */ - // ) - cl_int spec1 = -1024; - cl_int spec3 = 1024; - // Set spec1 - error = clSetProgramSpecializationConstant(program, cl_uint(1), sizeof(cl_int), static_cast(&spec1)); - RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant") - // Specialization constant spec2 should default to zero - // Set spec3 - error = clSetProgramSpecializationConstant(program, cl_uint(3), sizeof(cl_int), static_cast(&spec3)); - RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant") - - // Build program and create kernel - error = build_program_create_kernel_helper( - context, &program, &kernel, 1, &kernel_test_spec_consts_many_constants, "test_spec_consts_many_constants" - ); - RETURN_ON_ERROR(error) -#endif - - // host vector, size == 1, output[0] == 1 - std::vector output(1, cl_int(1)); - output_buffer = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * output.size(), NULL, &error); - RETURN_ON_CL_ERROR(error, "clCreateBuffer") - - error = clEnqueueWriteBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast(output.data()), 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer") - - error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - - work_size[0] = output.size(); - error = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, work_size, NULL, 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel") - - error = clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast(output.data()), 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer") - - // if output[0] != 0, then values of spec constants were incorrect - if(output[0] != 0) - { - RETURN_ON_ERROR_MSG(-1, "Test test_spec_consts_many_constants failed, output[0]: %d.", output[0]); - } - - clReleaseMemObject(output_buffer); - clReleaseKernel(kernel); - clReleaseProgram(program); - return error; -} - -// TEST 3 -// Verify that the API correctly handles the size of a specialization constant by exercising -// the API for specialization constants of different types (int, bool, float, etc.) - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -const char * kernel_test_spec_consts_different_types = - "__kernel void test_spec_consts_different_types(global int *output)\n" - "{\n" - " ulong gid = get_global_id(0);\n" - " output[gid] = 0;\n" - "}\n" -; -#else -const char * kernel_test_spec_consts_different_types = - "#include \n" - "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "spec_constant spec1(0);\n" - "spec_constant spec2(0);\n" - "spec_constant spec3(0);\n" - "spec_constant spec4(0);\n" - "spec_constant spec5(0);\n" - "spec_constant spec6(0);\n" - "spec_constant spec7(0);\n" - "spec_constant spec8(0);\n" - "spec_constant spec9(0.0f);\n" - "#ifdef cl_khr_fp64\n" - "spec_constant spec10(0.0);\n" - "#endif\n" - "#ifdef cl_khr_fp16\n" - "spec_constant spec11(0.0h);\n" - "#endif\n" - "__kernel void test_spec_consts_different_types(global_ptr output)\n" - "{\n" - " ulong gid = get_global_id(0);\n" - " int result = 0;\n" - " if(get(spec1) != char(CHAR_MAX)) result += 1;\n" - " if(get(spec2) != uchar(UCHAR_MAX)) result += 2;\n" - " if(get(spec3) != short(SHRT_MAX)) result += 4;\n" - " if(get(spec4) != ushort(USHRT_MAX)) result += 8;\n" - " if(get(spec5) != int(INT_MAX)) result += 16;\n" - " if(get(spec6) != uint(UINT_MAX)) result += 32;\n" - " if(get(spec7) != long(LONG_MAX)) result += 64;\n" - " if(get(spec8) != ulong(ULONG_MAX)) result += 128;\n" - " if(get(spec9) != float(FLT_MAX)) result += 256;\n" - "#ifdef cl_khr_fp64\n" - " if(get(spec10) != double(DBL_MAX)) result += 512;\n" - "#endif\n" - "#ifdef cl_khr_fp16\n" - " if(get(spec11) != half(HALF_MAX)) result += 1024;\n" - "#endif\n" - " output[gid] = result;\n" - "}\n" -; -#endif - - -AUTO_TEST_CASE(test_spec_consts_different_types) -(cl_device_id device, cl_context context, cl_command_queue queue, int count) -{ - int error = CL_SUCCESS; - - cl_mem output_buffer; - cl_program program; - cl_kernel kernel; - - size_t dim = 1; - size_t work_size[1]; - - std::string options = ""; - if(is_extension_available(device, "cl_khr_fp16")) - { - options += " -cl-fp16-enable"; - } - if(is_extension_available(device, "cl_khr_fp64")) - { - options += " -cl-fp64-enable"; - } -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - error = create_opencl_kernel(context, &program, &kernel, kernel_test_spec_consts_different_types, "test_spec_consts_different_types", options); - RETURN_ON_ERROR(error) - return error; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - error = create_opencl_kernel(context, &program, &kernel, kernel_test_spec_consts_different_types, "test_spec_consts_different_types", "", false); - RETURN_ON_ERROR(error) -// Normal run -#else - // Create program - error = create_openclcpp_program(context, &program, 1, &kernel_test_spec_consts_different_types, options.c_str()); - RETURN_ON_ERROR(error) - - // Set specialization constants - cl_uint spec_id = 1; - - cl_char spec1 = CL_CHAR_MAX; - cl_uchar spec2 = CL_UCHAR_MAX; - cl_short spec3 = CL_SHRT_MAX; - cl_ushort spec4 = CL_USHRT_MAX; - cl_int spec5 = CL_INT_MAX; - cl_uint spec6 = CL_UINT_MAX; - cl_long spec7 = CL_LONG_MAX; - cl_ulong spec8 = CL_ULONG_MAX; - cl_float spec9 = CL_FLT_MAX; - cl_double spec10 = CL_DBL_MAX; - cl_half spec11 = CL_HALF_MAX; - - // Set spec1 - error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_char), static_cast(&spec1)); - RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant") - // Set spec2 - error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_uchar), static_cast(&spec2)); - RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant") - // Set spec3 - error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_short), static_cast(&spec3)); - RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant") - // Set spec4 - error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_ushort), static_cast(&spec4)); - RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant") - // Set spec5 - error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_int), static_cast(&spec5)); - RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant") - // Set spec6 - error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_uint), static_cast(&spec6)); - RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant") - // Set spec7 - error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_long), static_cast(&spec7)); - RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant") - // Set spec8 - error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_ulong), static_cast(&spec8)); - RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant") - // Set spec9 - error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_float), static_cast(&spec9)); - RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant") - // Set spec10 - if(is_extension_available(device, "cl_khr_fp64")) - { - error = clSetProgramSpecializationConstant(program, cl_uint(10), sizeof(cl_double), static_cast(&spec10)); - RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant") - } - // Set spec11 - if(is_extension_available(device, "cl_khr_fp16")) - { - error = clSetProgramSpecializationConstant(program, cl_uint(11), sizeof(cl_half), static_cast(&spec11)); - RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant") - } - - // Build program and create kernel - error = build_program_create_kernel_helper( - context, &program, &kernel, 1, &kernel_test_spec_consts_many_constants, "test_spec_consts_many_constants" - ); - RETURN_ON_ERROR(error) -#endif - - // Copy output to output_buffer, run kernel, copy output_buffer back to output, check result - - // host vector, size == 1, output[0] == 1 - std::vector output(1, cl_int(1)); - output_buffer = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * output.size(), NULL, &error); - RETURN_ON_CL_ERROR(error, "clCreateBuffer") - - error = clEnqueueWriteBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast(output.data()), 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer") - - error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - - work_size[0] = output.size(); - error = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, work_size, NULL, 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel") - - error = clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast(output.data()), 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer") - - // if output[0] != 0, then some spec constants had incorrect values - if(output[0] != 0) - { - RETURN_ON_ERROR_MSG(-1, "Test test_spec_consts_different_types failed, output[0]: %d.", output[0]) - } - - clReleaseMemObject(output_buffer); - clReleaseKernel(kernel); - clReleaseProgram(program); - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_API_TEST_SPEC_CONSTS_HPP diff --git a/test_conformance/clcpp/atomics/CMakeLists.txt b/test_conformance/clcpp/atomics/CMakeLists.txt deleted file mode 100644 index 4fb4bfd198..0000000000 --- a/test_conformance/clcpp/atomics/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -set(MODULE_NAME CPP_ATOMICS) - -set(${MODULE_NAME}_SOURCES - main.cpp -) - -include(../../CMakeCommon.txt) diff --git a/test_conformance/clcpp/atomics/atomic_fetch.hpp b/test_conformance/clcpp/atomics/atomic_fetch.hpp deleted file mode 100644 index 39a9948879..0000000000 --- a/test_conformance/clcpp/atomics/atomic_fetch.hpp +++ /dev/null @@ -1,308 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_ATOMICS_ATOMIC_FETCH_HPP -#define TEST_CONFORMANCE_CLCPP_ATOMICS_ATOMIC_FETCH_HPP - -#include "../common.hpp" -#include "../funcs_test_utils.hpp" - - -const size_t atomic_bucket_size = 100; - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -template -std::string generate_kernel_atomic_fetch(func_type func) -{ - std::string in1_value = "input[gid]"; - std::string out1_value = "output[gid / " + std::to_string(atomic_bucket_size) + "]"; - std::string function_call = "atomic_" + func.str() + "(&" + out1_value + ", " + in1_value + ")"; - return - "" + func.defs() + - "__kernel void test_" + func.str() + "(global " + type_name() + " *input, global atomic_" + type_name() + " *output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " " + function_call + ";\n" - "}\n"; -} -#else -template -std::string generate_kernel_atomic_fetch(func_type func) -{ - std::string in1_value = "input[gid]"; - std::string out1_value = "output[gid / " + std::to_string(atomic_bucket_size) + "]"; - std::string function_call = func.str() + "(" + in1_value + ")"; - return - "" + func.defs() + - "" + func.headers() + - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void test_" + func.str() + "(global_ptr<" + type_name() + "[]> input," - "global_ptr() + ">[]> output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " " + out1_value + "." + function_call + ";\n" - "}\n"; -} -#endif - -template -bool verify_atomic_fetch(const std::vector &in, const std::vector &out, atomic_fetch op) -{ - for (size_t i = 0; i < out.size(); i++) - { - TYPE expected = op.init_out(); - for (size_t k = 0; k < atomic_bucket_size; k++) - { - const size_t in_i = i * atomic_bucket_size + k; - if (in_i >= in.size()) - break; - expected = op(expected, in[in_i]); - } - if (expected != out[i]) - { - print_error_msg(expected, out[i], i, op); - return false; - } - } - return true; -} - -template -int test_atomic_fetch_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, atomic_fetch op) -{ - cl_mem buffers[2]; - cl_program program; - cl_kernel kernel; - size_t work_size[1]; - int err; - - typedef typename atomic_fetch::in_type TYPE; - - // Don't run test for unsupported types - if (!(type_supported(device))) - { - return CL_SUCCESS; - } - if (sizeof(TYPE) == 8 && - (!is_extension_available(device, "cl_khr_int64_base_atomics") || - !is_extension_available(device, "cl_khr_int64_extended_atomics"))) - { - return CL_SUCCESS; - } - - std::string code_str = generate_kernel_atomic_fetch(op); - std::string kernel_name("test_"); kernel_name += op.str(); - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name); - RETURN_ON_ERROR(err) - return err; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false); - RETURN_ON_ERROR(err) -#else - err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name); - RETURN_ON_ERROR(err) -#endif - - std::vector input = generate_input(count, op.min1(), op.max1(), std::vector()); - std::vector output = generate_output((count - 1) / atomic_bucket_size + 1); - - buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(TYPE) * input.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer") - - buffers[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(TYPE) * output.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer") - - err = clEnqueueWriteBuffer( - queue, buffers[0], CL_TRUE, 0, sizeof(TYPE) * input.size(), - static_cast(input.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer") - - const TYPE pattern = op.init_out(); - err = clEnqueueFillBuffer(queue, buffers[1], &pattern, sizeof(pattern), 0, sizeof(TYPE) * output.size(), 0, NULL, NULL); - RETURN_ON_CL_ERROR(err, "clEnqueueFillBuffer") - - err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]); - RETURN_ON_CL_ERROR(err, "clSetKernelArg") - err = clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]); - RETURN_ON_CL_ERROR(err, "clSetKernelArg") - - work_size[0] = count; - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel") - - err = clEnqueueReadBuffer( - queue, buffers[1], CL_TRUE, 0, sizeof(TYPE) * output.size(), - static_cast(output.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer") - - if (!verify_atomic_fetch(input, output, op)) - { - RETURN_ON_ERROR_MSG(-1, "test_%s %s failed", op.str().c_str(), type_name().c_str()); - } - log_info("test_%s %s passed\n", op.str().c_str(), type_name().c_str()); - - clReleaseMemObject(buffers[0]); - clReleaseMemObject(buffers[1]); - clReleaseKernel(kernel); - clReleaseProgram(program); - return err; -} - - -template -struct atomic_fetch -{ - typedef TYPE in_type; - - std::string decl_str() - { - return type_name(); - } - - std::string defs() - { - std::string defs; - if (sizeof(TYPE) == 8) - { - defs += "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"; - defs += "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n"; - } - return defs; - } - - std::string headers() - { - return "#include \n"; - } - - TYPE min1() - { - return 0; - } - - TYPE max1() - { - return 1000; - } -}; - - -#define DEF_ATOMIC_FETCH_FUNC(CLASS_NAME, FUNC_NAME, HOST_FUNC_EXPRESSION, INIT_OUT) \ -template \ -struct CLASS_NAME : public atomic_fetch \ -{ \ - std::string str() \ - { \ - return #FUNC_NAME; \ - } \ - \ - TYPE init_out() \ - { \ - return INIT_OUT; \ - } \ - \ - TYPE operator()(const TYPE& x, const TYPE& y) \ - { \ - return HOST_FUNC_EXPRESSION; \ - } \ -}; - -DEF_ATOMIC_FETCH_FUNC(atomic_fetch_add, fetch_add, x + y, 0) -DEF_ATOMIC_FETCH_FUNC(atomic_fetch_sub, fetch_sub, x - y, (std::numeric_limits::max)()) - -DEF_ATOMIC_FETCH_FUNC(atomic_fetch_and, fetch_and, x & y, (std::numeric_limits::max)()) -DEF_ATOMIC_FETCH_FUNC(atomic_fetch_or, fetch_or, x | y, 0) -DEF_ATOMIC_FETCH_FUNC(atomic_fetch_xor, fetch_xor, x ^ y, 0) - -DEF_ATOMIC_FETCH_FUNC(atomic_fetch_max, fetch_max, (std::max)(x, y), 0) -DEF_ATOMIC_FETCH_FUNC(atomic_fetch_min, fetch_min, (std::min)(x, y), (std::numeric_limits::max)()) - -#undef DEF_ATOMIC_FETCH_FUNC - - -AUTO_TEST_CASE(test_atomic_fetch) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - -#define TEST_ATOMIC_MACRO(TEST_CLASS) \ - last_error = test_atomic_fetch_func( \ - device, context, queue, n_elems, TEST_CLASS \ - ); \ - CHECK_ERROR(last_error) \ - error |= last_error; - - TEST_ATOMIC_MACRO((atomic_fetch_add())) - TEST_ATOMIC_MACRO((atomic_fetch_add())) - TEST_ATOMIC_MACRO((atomic_fetch_add())) - TEST_ATOMIC_MACRO((atomic_fetch_add())) - - TEST_ATOMIC_MACRO((atomic_fetch_sub())) - TEST_ATOMIC_MACRO((atomic_fetch_sub())) - TEST_ATOMIC_MACRO((atomic_fetch_sub())) - TEST_ATOMIC_MACRO((atomic_fetch_sub())) - - TEST_ATOMIC_MACRO((atomic_fetch_and())) - TEST_ATOMIC_MACRO((atomic_fetch_and())) - TEST_ATOMIC_MACRO((atomic_fetch_and())) - TEST_ATOMIC_MACRO((atomic_fetch_and())) - - TEST_ATOMIC_MACRO((atomic_fetch_or())) - TEST_ATOMIC_MACRO((atomic_fetch_or())) - TEST_ATOMIC_MACRO((atomic_fetch_or())) - TEST_ATOMIC_MACRO((atomic_fetch_or())) - - TEST_ATOMIC_MACRO((atomic_fetch_xor())) - TEST_ATOMIC_MACRO((atomic_fetch_xor())) - TEST_ATOMIC_MACRO((atomic_fetch_xor())) - TEST_ATOMIC_MACRO((atomic_fetch_xor())) - - TEST_ATOMIC_MACRO((atomic_fetch_max())) - TEST_ATOMIC_MACRO((atomic_fetch_max())) - TEST_ATOMIC_MACRO((atomic_fetch_max())) - TEST_ATOMIC_MACRO((atomic_fetch_max())) - - TEST_ATOMIC_MACRO((atomic_fetch_min())) - TEST_ATOMIC_MACRO((atomic_fetch_min())) - TEST_ATOMIC_MACRO((atomic_fetch_min())) - TEST_ATOMIC_MACRO((atomic_fetch_min())) - -#undef TEST_ATOMIC_MACRO - - if (error != CL_SUCCESS) - { - return -1; - } - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_ATOMICS_ATOMIC_FETCH_HPP diff --git a/test_conformance/clcpp/attributes/CMakeLists.txt b/test_conformance/clcpp/attributes/CMakeLists.txt deleted file mode 100644 index 1b1c15aa4e..0000000000 --- a/test_conformance/clcpp/attributes/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -set(MODULE_NAME CPP_ATTRIBUTES) - -set(${MODULE_NAME}_SOURCES - main.cpp -) - -include(../../CMakeCommon.txt) diff --git a/test_conformance/clcpp/attributes/main.cpp b/test_conformance/clcpp/attributes/main.cpp deleted file mode 100644 index e731c00161..0000000000 --- a/test_conformance/clcpp/attributes/main.cpp +++ /dev/null @@ -1,27 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "../common.hpp" - -#include "test_ivdep.hpp" -#include "test_max_size.hpp" -#include "test_required_num_sub_groups.hpp" - - -int main(int argc, const char *argv[]) -{ - auto& tests = autotest::test_suite::global_test_suite().test_defs; - return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0); -} diff --git a/test_conformance/clcpp/attributes/test_ivdep.hpp b/test_conformance/clcpp/attributes/test_ivdep.hpp deleted file mode 100644 index 17b1f586ee..0000000000 --- a/test_conformance/clcpp/attributes/test_ivdep.hpp +++ /dev/null @@ -1,418 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_IVDEP_HPP -#define TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_IVDEP_HPP - -#include -#include -#include -#include - -// Common for all OpenCL C++ tests -#include "../common.hpp" - - -namespace test_ivdep { - -enum class loop_kind -{ - for_loop, - while_loop, - do_loop -}; - -struct test_options -{ - loop_kind loop; - int ivdep_length; - int offset1; - int offset2; - int iter_count; - bool offset1_param; - bool offset2_param; - bool iter_count_param; - bool cond_in_header; - bool init_in_header; - bool incr_in_header; -}; - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -std::string generate_source(test_options options) -{ - std::string offset1s = options.offset1_param ? "offset1" : std::to_string(options.offset1); - std::string offset2s = options.offset2_param ? "offset2" : std::to_string(options.offset2); - - std::string init = "i = 0"; - std::string cond = std::string("i < ") + (options.iter_count_param ? "iter_count" : std::to_string(options.iter_count)); - std::string incr = "i += 2"; - - std::stringstream s; - s << R"( - kernel void test(global int *a, global int *b, global int *c, int offset1, int offset2, int iter_count) - { - int i; - )"; - - // Loop #1 - if (!options.init_in_header) s << init << ";" << std::endl; - if (options.loop == loop_kind::for_loop) - s << "for (" << - (options.init_in_header ? init : "") << ";" << - (options.cond_in_header ? cond : "") << ";" << - (options.incr_in_header ? incr : "") << ")"; - else if (options.loop == loop_kind::while_loop) - s << "while (" << (options.cond_in_header ? cond : "true") << ")"; - else if (options.loop == loop_kind::do_loop) - s << "do"; - s << "{" << std::endl; - if (!options.cond_in_header) s << "if (!(" << cond << ")) break;" << std::endl; - s << "a[i + " << offset1s << "] = b[i + " << offset1s << "] * c[i + " << offset1s << "];" << std::endl; - if (!options.incr_in_header) s << incr << ";" << std::endl; - s << "}" << std::endl; - if (options.loop == loop_kind::do_loop) - s << "while (" << (options.cond_in_header ? cond : "true") << ");" << std::endl; - - // Loop #2 - if (!options.init_in_header) s << init << ";" << std::endl; - if (options.loop == loop_kind::for_loop) - s << "for (" << - (options.init_in_header ? init : "") << ";" << - (options.cond_in_header ? cond : "") << ";" << - (options.incr_in_header ? incr : "") << ")"; - else if (options.loop == loop_kind::while_loop) - s << "while (" << (options.cond_in_header ? cond : "true") << ")"; - else if (options.loop == loop_kind::do_loop) - s << "do"; - s << "{" << std::endl; - if (!options.cond_in_header) s << "if (!(" << cond << ")) break;" << std::endl; - s << "a[i + " << offset2s << "] = a[i] + b[i];" << std::endl; - if (!options.incr_in_header) s << incr << ";" << std::endl; - s << "}" << std::endl; - if (options.loop == loop_kind::do_loop) - s << "while (" << (options.cond_in_header ? cond : "true") << ");" << std::endl; - - s << "}" << std::endl; - - return s.str(); -} -#else -std::string generate_source(test_options options) -{ - std::string offset1s = options.offset1_param ? "offset1" : std::to_string(options.offset1); - std::string offset2s = options.offset2_param ? "offset2" : std::to_string(options.offset2); - - std::string init = "i = 0"; - std::string cond = std::string("i < ") + (options.iter_count_param ? "iter_count" : std::to_string(options.iter_count)); - std::string incr = "i += 2"; - - std::stringstream s; - s << R"( - #include - #include - - using namespace cl; - )"; - s << R"( - kernel void test(global_ptr a, global_ptr b, global_ptr c, int offset1, int offset2, int iter_count) - { - int i; - )"; - - // Loop #1 - if (!options.init_in_header) s << init << ";" << std::endl; - if (options.ivdep_length > 0) s << "[[cl::ivdep]]" << std::endl; - if (options.loop == loop_kind::for_loop) - s << "for (" << - (options.init_in_header ? init : "") << ";" << - (options.cond_in_header ? cond : "") << ";" << - (options.incr_in_header ? incr : "") << ")"; - else if (options.loop == loop_kind::while_loop) - s << "while (" << (options.cond_in_header ? cond : "true") << ")"; - else if (options.loop == loop_kind::do_loop) - s << "do"; - s << "{" << std::endl; - if (!options.cond_in_header) s << "if (!(" << cond << ")) break;" << std::endl; - s << "a[i + " << offset1s << "] = b[i + " << offset1s << "] * c[i + " << offset1s << "];" << std::endl; - if (!options.incr_in_header) s << incr << ";" << std::endl; - s << "}" << std::endl; - if (options.loop == loop_kind::do_loop) - s << "while (" << (options.cond_in_header ? cond : "true") << ");" << std::endl; - - // Loop #2 - if (!options.init_in_header) s << init << ";" << std::endl; - if (options.ivdep_length > 0) s << "[[cl::ivdep(" << options.ivdep_length << ")]]" << std::endl; - if (options.loop == loop_kind::for_loop) - s << "for (" << - (options.init_in_header ? init : "") << ";" << - (options.cond_in_header ? cond : "") << ";" << - (options.incr_in_header ? incr : "") << ")"; - else if (options.loop == loop_kind::while_loop) - s << "while (" << (options.cond_in_header ? cond : "true") << ")"; - else if (options.loop == loop_kind::do_loop) - s << "do"; - s << "{" << std::endl; - if (!options.cond_in_header) s << "if (!(" << cond << ")) break;" << std::endl; - s << "a[i + " << offset2s << "] = a[i] + b[i];" << std::endl; - if (!options.incr_in_header) s << incr << ";" << std::endl; - s << "}" << std::endl; - if (options.loop == loop_kind::do_loop) - s << "while (" << (options.cond_in_header ? cond : "true") << ");" << std::endl; - - s << "}" << std::endl; - - return s.str(); -} -#endif - -int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options) -{ - int error = CL_SUCCESS; - - cl_program program; - cl_kernel kernel; - - std::string kernel_name = "test"; - std::string source = generate_source(options); - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - error = create_opencl_kernel( - context, &program, &kernel, - source, kernel_name - ); - RETURN_ON_ERROR(error) - return error; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - error = create_opencl_kernel( - context, &program, &kernel, - source, kernel_name, "", false - ); - RETURN_ON_ERROR(error) -// Normal run -#else - error = create_opencl_kernel( - context, &program, &kernel, - source, kernel_name - ); - RETURN_ON_ERROR(error) -#endif - - const size_t count = 100; - const size_t global_size = 1; - - std::vector a(count); - std::vector b(count); - std::vector c(count); - for (size_t i = 0; i < count; i++) - { - a[i] = 0; - b[i] = i; - c[i] = 1; - } - - cl_mem a_buffer; - a_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, - sizeof(int) * count, static_cast(a.data()), &error - ); - RETURN_ON_CL_ERROR(error, "clCreateBuffer") - - cl_mem b_buffer; - b_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, - sizeof(int) * count, static_cast(b.data()), &error - ); - RETURN_ON_CL_ERROR(error, "clCreateBuffer") - - cl_mem c_buffer; - c_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, - sizeof(int) * count, static_cast(c.data()),&error - ); - RETURN_ON_CL_ERROR(error, "clCreateBuffer") - - error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &a_buffer); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - error = clSetKernelArg(kernel, 1, sizeof(cl_mem), &b_buffer); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &c_buffer); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - error = clSetKernelArg(kernel, 3, sizeof(cl_int), &options.offset1); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - error = clSetKernelArg(kernel, 4, sizeof(cl_int), &options.offset2); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - error = clSetKernelArg(kernel, 5, sizeof(cl_int), &options.iter_count); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel") - - std::vector a_output(count); - error = clEnqueueReadBuffer( - queue, a_buffer, CL_TRUE, - 0, sizeof(int) * count, - static_cast(a_output.data()), - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer") - - for (int i = 0; i < options.iter_count; i += 2) - { - a[i + options.offset1] = b[i + options.offset1] * c[i + options.offset1]; - } - - for (int i = 0; i < options.iter_count; i += 2) - { - a[i + options.offset2] = a[i] + b[i]; - } - - for (size_t i = 0; i < count; i++) - { - const int value = a_output[i]; - const int expected = a[i]; - if (value != expected) - { - RETURN_ON_ERROR_MSG(-1, - "Test failed. Element %lu: %d should be: %d", - i, value, expected - ); - } - } - - clReleaseMemObject(a_buffer); - clReleaseMemObject(b_buffer); - clReleaseMemObject(c_buffer); - clReleaseKernel(kernel); - clReleaseProgram(program); - return error; -} - -const std::vector> params{ - std::make_tuple( -1, 0, 0 ), - std::make_tuple( -1, 3, 4 ), - std::make_tuple( 1, 1, 1 ), - std::make_tuple( 3, 4, 2 ), - std::make_tuple( 3, 4, 3 ), - std::make_tuple( 8, 10, 7 ), - std::make_tuple( 16, 16, 16 ) -}; -const std::vector iter_counts{ { 1, 4, 12, 40 } }; - -AUTO_TEST_CASE(test_ivdep_for) -(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - int error = CL_SUCCESS; - - for (auto param : params) - for (auto iter_count : iter_counts) - for (bool offset1_param : { false, true }) - for (bool offset2_param : { false, true }) - for (bool iter_count_param : { false, true }) - for (bool cond_in_header : { false, true }) - for (bool init_in_header : { false, true }) - for (bool incr_in_header : { false, true }) - { - test_options options; - options.loop = loop_kind::for_loop; - options.ivdep_length = std::get<0>(param); - options.offset1 = std::get<1>(param); - options.offset2 = std::get<2>(param); - options.iter_count = iter_count; - options.offset1_param = offset1_param; - options.offset2_param = offset2_param; - options.iter_count_param = iter_count_param; - options.cond_in_header = cond_in_header; - options.init_in_header = init_in_header; - options.incr_in_header = incr_in_header; - - error = test(device, context, queue, options); - RETURN_ON_ERROR(error) - } - - return error; -} - -AUTO_TEST_CASE(test_ivdep_while) -(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - int error = CL_SUCCESS; - - for (auto param : params) - for (auto iter_count : iter_counts) - for (bool offset1_param : { false, true }) - for (bool offset2_param : { false, true }) - for (bool iter_count_param : { false, true }) - for (bool cond_in_header : { false, true }) - { - test_options options; - options.loop = loop_kind::while_loop; - options.ivdep_length = std::get<0>(param); - options.offset1 = std::get<1>(param); - options.offset2 = std::get<2>(param); - options.iter_count = iter_count; - options.offset1_param = offset1_param; - options.offset2_param = offset2_param; - options.iter_count_param = iter_count_param; - options.cond_in_header = cond_in_header; - options.init_in_header = false; - options.incr_in_header = false; - - error = test(device, context, queue, options); - RETURN_ON_ERROR(error) - } - - return error; -} - -AUTO_TEST_CASE(test_ivdep_do) -(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - int error = CL_SUCCESS; - - for (auto param : params) - for (auto iter_count : iter_counts) - for (bool offset1_param : { false, true }) - for (bool offset2_param : { false, true }) - for (bool iter_count_param : { false, true }) - for (bool cond_in_header : { false, true }) - { - test_options options; - options.loop = loop_kind::do_loop; - options.ivdep_length = std::get<0>(param); - options.offset1 = std::get<1>(param); - options.offset2 = std::get<2>(param); - options.iter_count = iter_count; - options.offset1_param = offset1_param; - options.offset2_param = offset2_param; - options.iter_count_param = iter_count_param; - options.cond_in_header = cond_in_header; - options.init_in_header = false; - options.incr_in_header = false; - - error = test(device, context, queue, options); - RETURN_ON_ERROR(error) - } - - return error; -} - -} // namespace - -#endif // TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_IVDEP_HPP diff --git a/test_conformance/clcpp/attributes/test_max_size.hpp b/test_conformance/clcpp/attributes/test_max_size.hpp deleted file mode 100644 index 15e7ead6bd..0000000000 --- a/test_conformance/clcpp/attributes/test_max_size.hpp +++ /dev/null @@ -1,266 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_MAX_SIZE_HPP -#define TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_MAX_SIZE_HPP - -#include -#include -#include - -// Common for all OpenCL C++ tests -#include "../common.hpp" - - -namespace test_max_size { - -enum class address_space -{ - constant, - local -}; - -enum class param_kind -{ - ptr_type, // constant_ptr - ptr, // constant* - ref // constant& -}; - -const param_kind param_kinds[] = -{ - param_kind::ptr_type, - param_kind::ptr, - param_kind::ref -}; - -struct test_options -{ - address_space space; - int max_size; - bool spec_const; - param_kind kind; - bool array; -}; - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -std::string generate_source(test_options options) -{ - std::stringstream s; - s << "kernel void test("; - s << (options.space == address_space::constant ? "constant" : "local"); - s << " int2 *input) { }" << std::endl; - - return s.str(); -} -#else -std::string generate_source(test_options options) -{ - std::string type_str = "int2"; - if (options.array) - type_str += "[]"; - - std::stringstream s; - s << "#include " << std::endl; - - if (options.spec_const) - { - s << "#include " << std::endl; - s << "cl::spec_constant max_size_spec{ 1234567890 };" << std::endl; - } - - s << "kernel void test("; - s << "[[cl::max_size(" << (options.spec_const ? "max_size_spec" : std::to_string(options.max_size)) << ")]] "; - s << (options.space == address_space::constant ? "cl::constant" : "cl::local"); - if (options.kind == param_kind::ptr_type) - s << "_ptr<" << type_str << ">"; - else if (options.kind == param_kind::ptr) - s << "<" << type_str << ">*"; - else if (options.kind == param_kind::ref) - s << "<" << type_str << ">&"; - s << " input) { }" << std::endl; - - return s.str(); -} -#endif - -int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options) -{ - int error = CL_SUCCESS; - - cl_program program; - cl_kernel kernel; - - std::string kernel_name = "test"; - std::string source = generate_source(options); - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - error = create_opencl_kernel( - context, &program, &kernel, - source, kernel_name - ); - RETURN_ON_ERROR(error) - return error; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - error = create_opencl_kernel( - context, &program, &kernel, - source, kernel_name, "", false - ); - RETURN_ON_ERROR(error) -// Normal run -#else - const char *source_c_str = source.c_str(); - error = create_openclcpp_program(context, &program, 1, &source_c_str, ""); - RETURN_ON_ERROR(error) - - if (options.spec_const) - { - error = clSetProgramSpecializationConstant(program, 1, sizeof(cl_int), &options.max_size); - RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant") - } - - error = build_program_create_kernel_helper( - context, &program, &kernel, 1, &source_c_str, kernel_name.c_str() - ); - RETURN_ON_ERROR(error) -#endif - - const int max_size = options.max_size; - const int sizes[] = { - 1, - max_size / 2, - max_size, - max_size + 1, - max_size * 2 - }; - - for (int size : sizes) - { - cl_mem const_buffer; - if (options.space == address_space::constant) - { - const_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, size, NULL, &error); - RETURN_ON_CL_ERROR(error, "clCreateBuffer") - - error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &const_buffer); - // Check the status later (depending on size and max_size values) - } - else if (options.space == address_space::local) - { - error = clSetKernelArg(kernel, 0, size, NULL); - // Check the status later (depending on size and max_size values) - } - - if (size <= max_size) - { - // Correct value, must not fail - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - - const size_t global_size = 123; - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel") - - error = clFinish(queue); - RETURN_ON_CL_ERROR(error, "clFinish") - } - else - { - // Incorrect value, must fail - if (error != CL_MAX_SIZE_RESTRICTION_EXCEEDED) - { - RETURN_ON_ERROR_MSG(-1, - "clSetKernelArg must fail with CL_MAX_SIZE_RESTRICTION_EXCEEDED," - " but returned %s (%d)", get_cl_error_string(error).c_str(), error - ); - } - } - - if (options.space == address_space::constant) - { - error = clReleaseMemObject(const_buffer); - RETURN_ON_CL_ERROR(error, "clReleaseMemObject") - } - } - - clReleaseKernel(kernel); - clReleaseProgram(program); - return error; -} - -AUTO_TEST_CASE(test_max_size_constant) -(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - int error = CL_SUCCESS; - - cl_ulong max_size; - error = clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(max_size), &max_size, NULL); - RETURN_ON_CL_ERROR(error, "clGetDeviceInfo") - - for (bool spec_const : { false, true }) - for (auto kind : param_kinds) - for (bool array : { false, true }) - { - test_options options; - options.space = address_space::constant; - options.max_size = max_size / 2; - options.spec_const = spec_const; - options.kind = kind; - options.array = array; - - error = test(device, context, queue, options); - RETURN_ON_ERROR(error) - } - - return error; -} - -AUTO_TEST_CASE(test_max_size_local) -(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - int error = CL_SUCCESS; - - cl_ulong max_size; - error = clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(max_size), &max_size, NULL); - RETURN_ON_CL_ERROR(error, "clGetDeviceInfo") - - for (bool spec_const : { false, true }) - for (auto kind : param_kinds) - for (bool array : { false, true }) - { - test_options options; - options.space = address_space::local; - options.max_size = max_size / 2; - options.spec_const = spec_const; - options.kind = kind; - options.array = array; - - error = test(device, context, queue, options); - RETURN_ON_ERROR(error) - } - - return error; -} - -} // namespace - -#endif // TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_MAX_SIZE_HPP diff --git a/test_conformance/clcpp/attributes/test_required_num_sub_groups.hpp b/test_conformance/clcpp/attributes/test_required_num_sub_groups.hpp deleted file mode 100644 index 2380eafe39..0000000000 --- a/test_conformance/clcpp/attributes/test_required_num_sub_groups.hpp +++ /dev/null @@ -1,285 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_REQUIRED_NUM_SUB_GROUPS_HPP -#define TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_REQUIRED_NUM_SUB_GROUPS_HPP - -#include -#include -#include -#include - -// Common for all OpenCL C++ tests -#include "../common.hpp" - - -namespace test_required_num_sub_groups { - -struct test_options -{ - size_t num_sub_groups; - bool spec_const; - size_t max_count; - size_t num_tests; -}; - -struct output_type -{ - cl_ulong num_sub_groups; - cl_ulong enqueued_num_sub_groups; -}; - -const std::string source_common = R"( -struct output_type -{ - ulong num_sub_groups; - ulong enqueued_num_sub_groups; -}; -)"; - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -std::string generate_source(test_options options) -{ - std::stringstream s; - s << source_common; - s << R"( - #pragma OPENCL EXTENSION cl_khr_subgroups : enable - - kernel void test(global struct output_type *output) - { - const ulong gid = get_global_linear_id(); - output[gid].num_sub_groups = get_num_sub_groups(); - output[gid].enqueued_num_sub_groups = get_enqueued_num_sub_groups(); - } - )"; - - return s.str(); -} -#else -std::string generate_source(test_options options) -{ - std::stringstream s; - s << R"( - #include - #include - using namespace cl; - )"; - - if (options.spec_const) - { - s << "#include " << std::endl; - s << "cl::spec_constant num_sub_groups_spec{ 1234567890 };" << std::endl; - } - - s << source_common << std::endl; - s << "[[cl::required_num_sub_groups(" << (options.spec_const ? "num_sub_groups_spec" : std::to_string(options.num_sub_groups)) << ")]]"; - s << R"( - kernel void test(global_ptr output) - { - const ulong gid = get_global_linear_id(); - output[gid].num_sub_groups = get_num_sub_groups(); - output[gid].enqueued_num_sub_groups = get_enqueued_num_sub_groups(); - } - )"; - - return s.str(); -} -#endif - -int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options) -{ - int error = CL_SUCCESS; - -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - if (!is_extension_available(device, "cl_khr_subgroups")) - { - log_info("SKIPPED: Extension `cl_khr_subgroups` is not supported. Skipping tests.\n"); - return CL_SUCCESS; - } -#endif - - cl_program program; - cl_kernel kernel; - - std::string kernel_name = "test"; - std::string source = generate_source(options); - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - error = create_opencl_kernel( - context, &program, &kernel, - source, kernel_name - ); - RETURN_ON_ERROR(error) - return error; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - error = create_opencl_kernel( - context, &program, &kernel, - source, kernel_name, "-cl-std=CL2.0", false - ); - RETURN_ON_ERROR(error) -// Normal run -#else - const char *source_c_str = source.c_str(); - error = create_openclcpp_program(context, &program, 1, &source_c_str, ""); - RETURN_ON_ERROR(error) - - if (options.spec_const) - { - cl_uint spec_num_sub_groups = static_cast(options.num_sub_groups); - error = clSetProgramSpecializationConstant(program, 1, sizeof(cl_uint), &spec_num_sub_groups); - RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant") - } - - error = build_program_create_kernel_helper( - context, &program, &kernel, 1, &source_c_str, kernel_name.c_str() - ); - RETURN_ON_ERROR(error) -#endif - - size_t compile_num_sub_groups; - error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_COMPILE_NUM_SUB_GROUPS, - 0, NULL, - sizeof(size_t), &compile_num_sub_groups, NULL); - RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo") - if (compile_num_sub_groups != options.num_sub_groups) - { - RETURN_ON_ERROR_MSG(-1, - "CL_KERNEL_COMPILE_NUM_SUB_GROUPS did not return correct value (expected %lu, got %lu)", - options.num_sub_groups, compile_num_sub_groups - ) - } - - cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(output_type) * options.max_count, NULL, &error); - RETURN_ON_CL_ERROR(error, "clCreateBuffer") - - error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution count_dis(1, options.max_count); - - for (size_t test = 0; test < options.num_tests; test++) - { - for (size_t dim = 1; dim <= 3; dim++) - { - size_t global_size[3] = { 1, 1, 1 }; - size_t count = count_dis(gen); - std::uniform_int_distribution global_size_dis(1, static_cast(pow(count, 1.0 / dim))); - for (size_t d = 0; d < dim; d++) - { - global_size[d] = global_size_dis(gen); - } - count = global_size[0] * global_size[1] * global_size[2]; - - size_t local_size[3] = { 1, 1, 1 }; - error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, - sizeof(size_t), &options.num_sub_groups, - sizeof(size_t) * dim, local_size, NULL); - RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo") - if (local_size[0] == 0 || local_size[1] != 1 || local_size[2] != 1) - { - RETURN_ON_ERROR_MSG(-1, - "CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT did not return correct value" - ) - } - - size_t sub_group_count_for_ndrange; - error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, - sizeof(size_t) * dim, local_size, - sizeof(size_t), &sub_group_count_for_ndrange, NULL); - RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo") - if (sub_group_count_for_ndrange != options.num_sub_groups) - { - RETURN_ON_ERROR_MSG(-1, - "CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE did not return correct value (expected %lu, got %lu)", - options.num_sub_groups, sub_group_count_for_ndrange - ) - } - - const char pattern = 0; - error = clEnqueueFillBuffer(queue, output_buffer, &pattern, sizeof(pattern), 0, sizeof(output_type) * count, 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueFillBuffer") - - error = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, global_size, local_size, 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel") - - std::vector output(count); - error = clEnqueueReadBuffer( - queue, output_buffer, CL_TRUE, - 0, sizeof(output_type) * count, - static_cast(output.data()), - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer") - - for (size_t gid = 0; gid < count; gid++) - { - const output_type &o = output[gid]; - - if (o.enqueued_num_sub_groups != options.num_sub_groups) - { - RETURN_ON_ERROR_MSG(-1, "get_enqueued_num_sub_groups does not equal to required_num_sub_groups") - } - if (o.num_sub_groups > options.num_sub_groups) - { - RETURN_ON_ERROR_MSG(-1, "get_num_sub_groups did not return correct value") - } - } - } - } - - clReleaseMemObject(output_buffer); - clReleaseKernel(kernel); - clReleaseProgram(program); - return error; -} - -AUTO_TEST_CASE(test_required_num_sub_groups) -(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - int error = CL_SUCCESS; - - cl_uint max_num_sub_groups; - error = clGetDeviceInfo(device, CL_DEVICE_MAX_NUM_SUB_GROUPS, sizeof(max_num_sub_groups), &max_num_sub_groups, NULL); - RETURN_ON_CL_ERROR(error, "clGetDeviceInfo") - - for (bool spec_const : { false, true }) - for (size_t num_sub_groups = 1; num_sub_groups <= max_num_sub_groups; num_sub_groups++) - { - test_options options; - options.spec_const = spec_const; - options.num_sub_groups = num_sub_groups; - options.num_tests = 100; - options.max_count = num_elements; - - error = test(device, context, queue, options); - RETURN_ON_ERROR(error) - } - - return error; -} - -} // namespace - -#endif // TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_REQUIRED_NUM_SUB_GROUPS_HPP diff --git a/test_conformance/clcpp/common.hpp b/test_conformance/clcpp/common.hpp deleted file mode 100644 index e06200265a..0000000000 --- a/test_conformance/clcpp/common.hpp +++ /dev/null @@ -1,51 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_COMMON_INC_HPP -#define TEST_CONFORMANCE_CLCPP_COMMON_INC_HPP - -#include -#include -#include -#include -#include -#include - -// harness framework -#include "harness/compat.h" -#include "harness/testHarness.h" -#include "harness/errorHelpers.h" -#include "harness/kernelHelpers.h" - -// autotest -#include "autotest/autotest.hpp" - -// utils_common -#include "utils_common/is_vector_type.hpp" -#include "utils_common/scalar_type.hpp" -#include "utils_common/make_vector_type.hpp" -#include "utils_common/type_name.hpp" -#include "utils_common/type_supported.hpp" -#include "utils_common/vector_size.hpp" -#include "utils_common/kernel_helpers.hpp" -#include "utils_common/errors.hpp" -#include "utils_common/string.hpp" - -size_t get_uniform_global_size(size_t global_size, size_t local_size) -{ - return static_cast(std::ceil(static_cast(global_size) / local_size)) * local_size; -} - -#endif // TEST_CONFORMANCE_CLCPP_COMMON_INC_HPP diff --git a/test_conformance/clcpp/common_funcs/CMakeLists.txt b/test_conformance/clcpp/common_funcs/CMakeLists.txt deleted file mode 100644 index 5e4d8b035e..0000000000 --- a/test_conformance/clcpp/common_funcs/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -set(MODULE_NAME CPP_COMMON_FUNCS) - -set(${MODULE_NAME}_SOURCES - main.cpp -) - -include(../../CMakeCommon.txt) diff --git a/test_conformance/clcpp/common_funcs/common_funcs.hpp b/test_conformance/clcpp/common_funcs/common_funcs.hpp deleted file mode 100644 index d6f8c89704..0000000000 --- a/test_conformance/clcpp/common_funcs/common_funcs.hpp +++ /dev/null @@ -1,417 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_COMMON_FUNCS_COMMON_FUNCS_HPP -#define TEST_CONFORMANCE_CLCPP_COMMON_FUNCS_COMMON_FUNCS_HPP - -#include "../common.hpp" -#include "../funcs_test_utils.hpp" - -#include -#include - -// floatn clamp(floatn x, floatn min, floatn max) (only scalars) -template -struct common_func_clamp : public ternary_func -{ - std::string str() - { - return "clamp"; - } - - std::string headers() - { - return "#include \n"; - } - - OUT1 operator()(const IN1& x, const IN2& minval, const IN3& maxval) - { - static_assert( - std::is_same::value - && std::is_same::value - && std::is_same::value, - "All types must be the same" - ); - return (std::min)((std::max)(x, minval), maxval); - } - - IN2 min2() - { - return (std::numeric_limits::min)(); - } - - IN2 max2() - { - return (std::numeric_limits::max)() / IN2(4000.0f); - } - - IN3 min3() - { - return IN3(1) + ((std::numeric_limits::max)() / IN3(4000.0f)); - } - - IN3 max3() - { - return (std::numeric_limits::max)() / IN3(2000.0f); - } - - float ulp() - { - return 0.0f; - } -}; - -// floatn degrees(floatn t) -template -struct common_func_degrees : public unary_func -{ - std::string str() - { - return "degrees"; - } - - std::string headers() - { - return "#include \n"; - } - - REFERENCE operator()(const IN1& x) - { - static_assert( - std::is_same::value, - "All types must be the same" - ); - return (REFERENCE(180.0) / CL_M_PI) * static_cast(x); - } - - float ulp() - { - return 2.5f; - } -}; - -// floatn max(floatn x, floatn y) -template -struct common_func_max : public binary_func -{ - std::string str() - { - return "max"; - } - - std::string headers() - { - return "#include \n"; - } - - OUT1 operator()(const IN1& x, const IN2& y) - { - static_assert( - std::is_same::value && std::is_same::value, - "All types must be the same" - ); - return (std::max)(x, y); - } - - float ulp() - { - return 0.0f; - } -}; - -// floatn min(floatn x, floatn y) -template -struct common_func_min : public binary_func -{ - std::string str() - { - return "min"; - } - - std::string headers() - { - return "#include \n"; - } - - OUT1 operator()(const IN1& x, const IN2& y) - { - static_assert( - std::is_same::value && std::is_same::value, - "All types must be the same" - ); - return (std::min)(x, y); - } - - float ulp() - { - return 0.0f; - } -}; - -// floatn mix(floatn x, floatn y, floatn a); -template -struct common_func_mix : public ternary_func -{ - std::string str() - { - return "mix"; - } - - std::string headers() - { - return "#include \n"; - } - - OUT1 operator()(const IN1& x, const IN2& y, const IN3& a) - { - static_assert( - std::is_same::value - && std::is_same::value - && std::is_same::value, - "All types must be the same" - ); - return static_cast(x) + ((static_cast(y) - static_cast(x)) * static_cast(a)); - } - - IN3 min3() - { - return IN3(0.0f + CL_FLT_EPSILON); - } - - IN3 max3() - { - return IN3(1.0f - CL_FLT_EPSILON); - } - - bool use_ulp() - { - return false; - } -}; - -// floatn radians(floatn t) -template -struct common_func_radians : public unary_func -{ - std::string str() - { - return "radians"; - } - - std::string headers() - { - return "#include \n"; - } - - REFERENCE operator()(const IN1& x) - { - static_assert( - std::is_same::value, - "All types must be the same" - ); - return (CL_M_PI / REFERENCE(180.0)) * static_cast(x); - } - - float ulp() - { - return 2.5f; - } -}; - -// floatn step(floatn edge, floatn x) -template -struct common_func_step : public binary_func -{ - std::string str() - { - return "step"; - } - - std::string headers() - { - return "#include \n"; - } - - OUT1 operator()(const IN1& edge, const IN2& x) - { - static_assert( - std::is_same::value && std::is_same::value, - "All types must be the same" - ); - if(x < edge) - return OUT1(0.0f); - return OUT1(1.0f); - } - - float ulp() - { - return 0.0f; - } -}; - -// floatn smoothstep(floatn edge0, floatn edge1, floatn x); -template -struct common_func_smoothstep : public ternary_func -{ - std::string str() - { - return "smoothstep"; - } - - std::string headers() - { - return "#include \n"; - } - - OUT1 operator()(const IN1& edge0, const IN2& edge1, const IN3& x) - { - static_assert( - std::is_same::value - && std::is_same::value - && std::is_same::value, - "All types must be the same" - ); - if(x <= edge0) - { - return OUT1(0.0f); - } - if(x >= edge1) - { - return OUT1(1.0f); - } - OUT1 t = (x - edge0) / (edge1 - edge0); - t = t * t * (3.0f - 2.0f * t); - return t; - } - - // edge0 must be < edge1 - IN1 min1() - { - return (std::numeric_limits::min)(); - } - - IN1 max1() - { - return (std::numeric_limits::max)() / IN1(8000.0f); - } - - IN2 min2() - { - return IN3(1) + ((std::numeric_limits::max)() / IN2(4000.0f)); - } - - IN2 max2() - { - return (std::numeric_limits::max)() / IN2(2000.0f); - } - - bool use_ulp() - { - return false; - } -}; - -// floatn sign(floatn t) -template -struct common_func_sign : public unary_func -{ - std::string str() - { - return "sign"; - } - - std::string headers() - { - return "#include \n"; - } - - OUT1 operator()(const IN1& x) - { - static_assert( - std::is_same::value, - "All types must be the same" - ); - if(x == IN1(-0.0f)) - { - return IN1(-0.0f); - } - if(x == IN1(+0.0f)) - { - return IN1(+0.0f); - } - if(x > IN1(0.0f)) - { - return IN1(1.0f); - } - return IN1(-1.0f); - } - - bool use_ulp() - { - return false; - } - - float ulp() - { - return 0.0f; - } - - std::vector in_special_cases() - { - return { -0.0f, +0.0f }; - } -}; - -AUTO_TEST_CASE(test_common_funcs) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - - // floatn clamp(floatn x, floatn min, floatn max) - TEST_TERNARY_FUNC_MACRO((common_func_clamp())) - - // floatn degrees(floatn t) - TEST_UNARY_FUNC_MACRO((common_func_degrees())) - - // floatn max(floatn x, floatn y); - TEST_BINARY_FUNC_MACRO((common_func_max())) - - // floatn min(floatn x, floatn y); - TEST_BINARY_FUNC_MACRO((common_func_min())) - - // floatn mix(floatn x, floatn y, floatn a); - TEST_TERNARY_FUNC_MACRO((common_func_mix())) - - // floatn radians(floatn t) - TEST_UNARY_FUNC_MACRO((common_func_radians())) - - // floatn step(floatn edge, floatn x) - TEST_BINARY_FUNC_MACRO((common_func_step())) - - // floatn smoothstep(floatn edge0, floatn edge1, floatn x) - TEST_TERNARY_FUNC_MACRO((common_func_smoothstep())) - - // floatn sign(floatn t); - TEST_UNARY_FUNC_MACRO((common_func_sign())) - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_COMMON_FUNCS_COMMON_FUNCS_HPP diff --git a/test_conformance/clcpp/common_funcs/main.cpp b/test_conformance/clcpp/common_funcs/main.cpp deleted file mode 100644 index 4a6277a393..0000000000 --- a/test_conformance/clcpp/common_funcs/main.cpp +++ /dev/null @@ -1,43 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include - -#include "../common.hpp" - -#include "common_funcs.hpp" - -int main(int argc, const char *argv[]) -{ - // Check if cl_float (float) and cl_double (double) fulfill the requirements of - // IEC 559 (IEEE 754) standard. This is required for the tests to run correctly. - if(!std::numeric_limits::is_iec559) - { - RETURN_ON_ERROR_MSG(-1, - "cl_float (float) does not fulfill the requirements of IEC 559 (IEEE 754) standard. " - "Tests won't run correctly." - ); - } - if(!std::numeric_limits::is_iec559) - { - RETURN_ON_ERROR_MSG(-1, - "cl_double (double) does not fulfill the requirements of IEC 559 (IEEE 754) standard. " - "Tests won't run correctly." - ); - } - - auto& tests = autotest::test_suite::global_test_suite().test_defs; - return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0); -} diff --git a/test_conformance/clcpp/convert/CMakeLists.txt b/test_conformance/clcpp/convert/CMakeLists.txt deleted file mode 100644 index 9f69feabb4..0000000000 --- a/test_conformance/clcpp/convert/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -set(MODULE_NAME CPP_CONVERT) - -set(${MODULE_NAME}_SOURCES - main.cpp -) - -include(../../CMakeCommon.txt) diff --git a/test_conformance/clcpp/convert/convert_cast.hpp b/test_conformance/clcpp/convert/convert_cast.hpp deleted file mode 100644 index 81fcca63cc..0000000000 --- a/test_conformance/clcpp/convert/convert_cast.hpp +++ /dev/null @@ -1,309 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_CONVERT_CONVERT_CAST_HPP -#define TEST_CONFORMANCE_CLCPP_CONVERT_CONVERT_CAST_HPP - -#include "../common.hpp" -#include "../funcs_test_utils.hpp" - -#include - - -enum class rounding_mode -{ - def, - /*rte, not implemented here */ - rtz, - rtp, - rtn -}; - -enum class saturate { def, off, on }; - -std::string rounding_mode_name(rounding_mode rmode) -{ - switch (rmode) - { - case rounding_mode::rtz: return "rtz"; - case rounding_mode::rtp: return "rtp"; - case rounding_mode::rtn: return "rtn"; - default: return ""; - } -} - -std::string saturate_name(saturate smode) -{ - switch (smode) - { - case saturate::off: return "off"; - case saturate::on: return "on"; - default: return ""; - } -} - -template -T clamp(T x, T a, T b) -{ - return (std::min)(b, (std::max)(a, x)); -} - -template -struct convert_cast : public unary_func -{ - static_assert(vector_size::value == vector_size::value, "The operand and result type must have the same number of elements"); - - typedef typename scalar_type::type in_scalar_type; - typedef typename scalar_type::type out_scalar_type; - - in_scalar_type in_min; - in_scalar_type in_max; - rounding_mode rmode; - saturate smode; - - convert_cast(in_scalar_type min, in_scalar_type max, rounding_mode rmode, saturate smode) - : in_min(min), in_max(max), rmode(rmode), smode(smode) - { - } - - std::string str() - { - return "convert_cast"; - } - - std::string headers() - { - return "#include \n"; - } - - IN1 min1() - { - return detail::def_limit(in_min); - } - - IN1 max1() - { - return detail::def_limit(in_max); - } - - OUT1 operator()(const IN1& x) - { - OUT1 y; - for (size_t i = 0; i < vector_size::value; i++) - { - in_scalar_type v; - if (smode == saturate::on) - v = clamp(x.s[i], - static_cast((std::numeric_limits::min)()), - static_cast((std::numeric_limits::max)()) - ); - else - v = x.s[i]; - - if (std::is_integral::value) - { - switch (rmode) - { - case rounding_mode::rtp: - y.s[i] = static_cast(std::ceil(v)); - break; - case rounding_mode::rtn: - y.s[i] = static_cast(std::floor(v)); - break; - default: - y.s[i] = static_cast(v); - } - } - else - { - y.s[i] = static_cast(v); - } - } - return y; - } -}; - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -template -std::string generate_kernel_convert_cast(func_type func) -{ - std::string in1_value = "input[gid]"; - std::string function_call = "convert_" + type_name(); - if (func.smode == saturate::on) - function_call += "_sat"; - if (func.rmode != rounding_mode::def) - function_call += "_" + rounding_mode_name(func.rmode); - function_call += "(" + in1_value + ")"; - return - "__kernel void test_" + func.str() + "(global " + type_name() + " *input, global " + type_name() + " *output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " output[gid] = " + function_call + ";\n" - "}\n"; -} -#else -template -std::string generate_kernel_convert_cast(func_type func) -{ - std::string headers = func.headers(); - std::string in1_value = "input[gid]"; - std::string function_call = "convert_cast<" + type_name(); - if (func.rmode != rounding_mode::def) - function_call += ", rounding_mode::" + rounding_mode_name(func.rmode); - if (func.smode != saturate::def) - function_call += ", saturate::" + saturate_name(func.smode); - function_call += ">(" + in1_value + ")"; - return - "" + func.defs() + - "" + headers + - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void test_" + func.str() + "(global_ptr<" + type_name() + "[]> input," - "global_ptr<" + type_name() + "[]> output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " output[gid] = " + function_call + ";\n" - "}\n"; -} -#endif - -template -int test_convert_cast_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, convert_cast_op op) -{ - cl_mem buffers[2]; - cl_program program; - cl_kernel kernel; - size_t work_size[1]; - int error; - - typedef typename convert_cast_op::in_type INPUT; - typedef typename convert_cast_op::out_type OUTPUT; - - // Don't run test for unsupported types - if (!(type_supported(device) && type_supported(device))) - { - return CL_SUCCESS; - } - - std::string code_str = generate_kernel_convert_cast(op); - std::string kernel_name("test_"); kernel_name += op.str(); - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name); - RETURN_ON_ERROR(error) - return error; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false); - RETURN_ON_ERROR(error) -#else - error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name); - RETURN_ON_ERROR(error) -#endif - - std::vector input = generate_input(count, op.min1(), op.max1(), op.in_special_cases()); - std::vector output = generate_output(count); - - buffers[0] = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(INPUT) * input.size(), NULL, &error); - RETURN_ON_CL_ERROR(error, "clCreateBuffer") - - buffers[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(OUTPUT) * output.size(), NULL, &error); - RETURN_ON_CL_ERROR(error, "clCreateBuffer") - - error = clEnqueueWriteBuffer( - queue, buffers[0], CL_TRUE, 0, sizeof(INPUT) * input.size(), - static_cast(input.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer") - - error = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - error = clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - - work_size[0] = count; - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel") - - error = clEnqueueReadBuffer( - queue, buffers[1], CL_TRUE, 0, sizeof(OUTPUT) * output.size(), - static_cast(output.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer") - - if (!verify_unary(input, output, op)) - { - RETURN_ON_ERROR_MSG(-1, "test_%s %s(%s) failed", op.str().c_str(), type_name().c_str(), type_name().c_str()); - } - log_info("test_%s %s(%s) passed\n", op.str().c_str(), type_name().c_str(), type_name().c_str()); - - clReleaseMemObject(buffers[0]); - clReleaseMemObject(buffers[1]); - clReleaseKernel(kernel); - clReleaseProgram(program); - return error; -} - - -AUTO_TEST_CASE(test_convert_cast) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - -#define TEST_CONVERT_CAST_MACRO(OP) \ - last_error = test_convert_cast_func( \ - device, context, queue, n_elems, OP \ - ); \ - CHECK_ERROR(last_error) \ - error |= last_error; - - // No-op - TEST_CONVERT_CAST_MACRO((convert_cast(-100.0f, +100.0f, rounding_mode::rtn, saturate::def))) - TEST_CONVERT_CAST_MACRO((convert_cast(0, 255, rounding_mode::def, saturate::def))) - - // int to int - TEST_CONVERT_CAST_MACRO((convert_cast(40000, 40000, rounding_mode::def, saturate::on))) - TEST_CONVERT_CAST_MACRO((convert_cast(0, 127, rounding_mode::def, saturate::off))) - TEST_CONVERT_CAST_MACRO((convert_cast(-100, 100, rounding_mode::def, saturate::off))) - - // float to int - TEST_CONVERT_CAST_MACRO((convert_cast(-100.0f, +400.0f, rounding_mode::def, saturate::on))) - TEST_CONVERT_CAST_MACRO((convert_cast(-127.0, +127.0, rounding_mode::rtp, saturate::off))) - TEST_CONVERT_CAST_MACRO((convert_cast(-1000.0f, +10000.0f, rounding_mode::rtp, saturate::on))) - TEST_CONVERT_CAST_MACRO((convert_cast(-10000.0f, +70000.0f, rounding_mode::rtn, saturate::on))) - - // int to float - TEST_CONVERT_CAST_MACRO((convert_cast(0, 12345, rounding_mode::def, saturate::def))) - TEST_CONVERT_CAST_MACRO((convert_cast(-1000000, +1000000, rounding_mode::rtz, saturate::def))) - -#undef TEST_CONVERT_CAST_MACRO - - if (error != CL_SUCCESS) - { - return -1; - } - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_CONVERT_CONVERT_CAST_HPP diff --git a/test_conformance/clcpp/device_queue/CMakeLists.txt b/test_conformance/clcpp/device_queue/CMakeLists.txt deleted file mode 100644 index 0e1b2ee436..0000000000 --- a/test_conformance/clcpp/device_queue/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -set(MODULE_NAME CPP_DEVICE_QUEUE) - -set(${MODULE_NAME}_SOURCES - main.cpp -) - -include(../../CMakeCommon.txt) diff --git a/test_conformance/clcpp/device_queue/test_enqueue.hpp b/test_conformance/clcpp/device_queue/test_enqueue.hpp deleted file mode 100644 index f5d4e6dc28..0000000000 --- a/test_conformance/clcpp/device_queue/test_enqueue.hpp +++ /dev/null @@ -1,699 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_DEVICE_QUEUE_TEST_ENQUEUE_HPP -#define TEST_CONFORMANCE_CLCPP_DEVICE_QUEUE_TEST_ENQUEUE_HPP - -#include -#include -#include -#include - -// Common for all OpenCL C++ tests -#include "../common.hpp" - - -namespace test_enqueue { - -struct test_options -{ - int test; -}; - -struct output_type -{ - cl_int enqueue_kernel1_success; - cl_int enqueue_kernel2_success; - cl_int enqueue_kernel3_success; - cl_int enqueue_marker_success; - cl_int event1_is_valid; - cl_int event2_is_valid; - cl_int event3_is_valid; - cl_int user_event1_is_valid; - cl_int user_event2_is_valid; - cl_int values[10000]; -}; - -const std::string source_common = R"( -struct output_type -{ - int enqueue_kernel1_success; - int enqueue_kernel2_success; - int enqueue_kernel3_success; - int enqueue_marker_success; - int event1_is_valid; - int event2_is_valid; - int event3_is_valid; - int user_event1_is_valid; - int user_event2_is_valid; - int values[10000]; -}; -)"; - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -std::string generate_source(test_options options) -{ - std::stringstream s; - s << source_common; - if (options.test == 0) - { - s << R"( - kernel void test(queue_t queue, global struct output_type *output) - { - const ulong gid = get_global_id(0); - - if (gid != 0) - return; - - output->enqueue_kernel2_success = 1; - output->enqueue_kernel3_success = 1; - output->enqueue_marker_success = 1; - output->event2_is_valid = 1; - output->event3_is_valid = 1; - output->user_event1_is_valid = 1; - output->user_event2_is_valid = 1; - - queue_t default_queue = get_default_queue(); - - ndrange_t ndrange1 = ndrange_1D(get_global_size(0)); - clk_event_t event1; - int status1 = enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange1, 0, NULL, &event1, - ^{ - const ulong gid = get_global_id(0); - output->values[gid] = 1; - }); - output->enqueue_kernel1_success = status1 == CLK_SUCCESS; - output->event1_is_valid = is_valid_event(event1); - - release_event(event1); - } - )"; - } - else if (options.test == 1) - { - s << R"( - kernel void test(queue_t queue, global struct output_type *output) - { - const ulong gid = get_global_id(0); - - if (gid != 0) - return; - - output->enqueue_kernel3_success = 1; - output->enqueue_marker_success = 1; - output->event3_is_valid = 1; - output->user_event1_is_valid = 1; - output->user_event2_is_valid = 1; - - queue_t default_queue = get_default_queue(); - - ndrange_t ndrange1 = ndrange_1D(get_global_size(0) / 2); - clk_event_t event1; - int status1 = enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange1, 0, NULL, &event1, - ^{ - const ulong gid = get_global_id(0); - output->values[gid * 2] = 1; - }); - output->enqueue_kernel1_success = status1 == CLK_SUCCESS; - output->event1_is_valid = is_valid_event(event1); - - ndrange_t ndrange2 = ndrange_1D(1, get_global_size(0) / 2, 1); - clk_event_t event2; - int status2 = enqueue_kernel(queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange2, 1, &event1, &event2, - ^{ - const ulong gid = get_global_id(0); - output->values[(gid - 1) * 2 + 1] = 1; - }); - output->enqueue_kernel2_success = status2 == CLK_SUCCESS; - output->event2_is_valid = is_valid_event(event2); - - release_event(event1); - release_event(event2); - } - )"; - } - else if (options.test == 2) - { - s << R"( - kernel void test(queue_t queue, global struct output_type *output) - { - const ulong gid = get_global_id(0); - - if (gid != 0) - return; - - output->enqueue_marker_success = 1; - output->event3_is_valid = 1; - output->enqueue_kernel3_success = 1; - - queue_t default_queue = get_default_queue(); - - clk_event_t user_event1 = create_user_event(); - retain_event(user_event1); - output->user_event1_is_valid = is_valid_event(user_event1); - - ndrange_t ndrange1 = ndrange_1D(get_global_size(0) / 2); - clk_event_t event1; - int status1 = enqueue_kernel(queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange1, 1, &user_event1, &event1, - ^{ - const ulong gid = get_global_id(0); - output->values[gid * 2] = 1; - }); - output->enqueue_kernel1_success = status1 == CLK_SUCCESS; - output->event1_is_valid = is_valid_event(event1); - release_event(user_event1); - - clk_event_t user_event2 = create_user_event(); - output->user_event2_is_valid = is_valid_event(user_event2); - - clk_event_t events[2]; - events[0] = user_event2; - events[1] = user_event1; - - ndrange_t ndrange2 = ndrange_1D(1, get_global_size(0) / 2, get_local_size(0)); - clk_event_t event2; - int status2 = enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange2, 2, events, &event2, - ^(local void *p0, local void *p1, local void *p2) { - const ulong gid = get_global_id(0); - const ulong lid = get_local_id(0); - local int2 *l0 = (local int2 *)p0; - local int *l1 = (local int *)p1; - local int *l2 = (local int *)p2; - l1[get_local_size(0) - lid - 1] = gid > 0 ? 1 : 0; - work_group_barrier(CLK_LOCAL_MEM_FENCE); - if (lid < 5) l0[lid] = (int2)(3, 4); - if (lid < 3) l2[lid] = 5; - work_group_barrier(CLK_LOCAL_MEM_FENCE); - output->values[(gid - 1) * 2 + 1] = min(l1[lid], min(l0[0].x, l2[0])); - }, sizeof(int2) * 5, sizeof(int) * get_local_size(0), sizeof(int) * 3); - output->enqueue_kernel2_success = status2 == CLK_SUCCESS; - output->event2_is_valid = is_valid_event(event2); - - set_user_event_status(user_event1, CL_COMPLETE); - set_user_event_status(user_event2, CL_COMPLETE); - - release_event(user_event1); - release_event(user_event2); - release_event(event1); - release_event(event2); - } - )"; - } - else if (options.test == 3) - { - s << R"( - kernel void test(queue_t queue, global struct output_type *output) - { - const ulong gid = get_global_id(0); - - if (gid != 0) - return; - - output->user_event2_is_valid = 1; - - queue_t default_queue = get_default_queue(); - - ndrange_t ndrange1 = ndrange_1D(get_global_size(0) / 2); - clk_event_t event1; - int status1 = enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange1, 0, NULL, &event1, - ^{ - const ulong gid = get_global_id(0); - output->values[gid * 2] = 20; - }); - output->enqueue_kernel1_success = status1 == CLK_SUCCESS; - output->event1_is_valid = is_valid_event(event1); - - ndrange_t ndrange2 = ndrange_1D(1, get_global_size(0) / 2, 1); - clk_event_t event2; - int status2 = enqueue_kernel(queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange2, 0, NULL, &event2, - ^{ - const ulong gid = get_global_id(0); - output->values[(gid - 1) * 2 + 1] = 20; - }); - output->enqueue_kernel2_success = status2 == CLK_SUCCESS; - output->event2_is_valid = is_valid_event(event2); - - clk_event_t user_event1 = create_user_event(); - output->user_event1_is_valid = is_valid_event(user_event1); - - clk_event_t events[3]; - events[0] = event2; - events[1] = user_event1; - events[2] = event1; - - clk_event_t event3; - int status3 = enqueue_marker(queue, 3, events, &event3); - output->enqueue_marker_success = status3 == CLK_SUCCESS; - output->event3_is_valid = is_valid_event(event3); - - int status4 = enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange_1D(get_global_size(0)), 1, &event3, NULL, - ^{ - const ulong gid = get_global_id(0); - output->values[gid] /= 20; - }); - output->enqueue_kernel3_success = status4 == CLK_SUCCESS; - - set_user_event_status(user_event1, CL_COMPLETE); - - release_event(user_event1); - release_event(event1); - release_event(event2); - release_event(event3); - } - )"; - } - - return s.str(); -} -#else -std::string generate_source(test_options options) -{ - std::stringstream s; - s << R"( - #include - #include - #include - #include - #include - using namespace cl; - )"; - - s << source_common; - if (options.test == 0) - { - s << R"( - kernel void test(device_queue queue, global *output) - { - const ulong gid = get_global_id(0); - - if (gid != 0) - return; - - output->enqueue_kernel2_success = 1; - output->enqueue_kernel3_success = 1; - output->enqueue_marker_success = 1; - output->event2_is_valid = 1; - output->event3_is_valid = 1; - output->user_event1_is_valid = 1; - output->user_event2_is_valid = 1; - - device_queue default_queue = get_default_device_queue(); - - ndrange ndrange1(get_global_size(0)); - event event1; - enqueue_status status1 = default_queue.enqueue_kernel(enqueue_policy::no_wait, 0, nullptr, &event1, ndrange1, - [](global *output) { - const ulong gid = get_global_id(0); - output->values[gid] = 1; - }, output); - output->enqueue_kernel1_success = status1 == enqueue_status::success; - output->event1_is_valid = event1.is_valid(); - - event1.release(); - } - )"; - } - else if (options.test == 1) - { - s << R"( - kernel void test(device_queue queue, global *output) - { - const ulong gid = get_global_id(0); - - if (gid != 0) - return; - - output->enqueue_kernel3_success = 1; - output->enqueue_marker_success = 1; - output->event3_is_valid = 1; - output->user_event1_is_valid = 1; - output->user_event2_is_valid = 1; - - device_queue default_queue = get_default_device_queue(); - - ndrange ndrange1(get_global_size(0) / 2); - event event1; - enqueue_status status1 = default_queue.enqueue_kernel(enqueue_policy::wait_work_group, 0, nullptr, &event1, ndrange1, - [](global *output) { - const ulong gid = get_global_id(0); - output->values[gid * 2] = 1; - }, output); - output->enqueue_kernel1_success = status1 == enqueue_status::success; - output->event1_is_valid = event1.is_valid(); - - ndrange ndrange2(1, get_global_size(0) / 2, 1); - event event2; - enqueue_status status2 = queue.enqueue_kernel(enqueue_policy::wait_kernel, 1, &event1, &event2, ndrange2, - [](global *output) { - const ulong gid = get_global_id(0); - output->values[(gid - 1) * 2 + 1] = 1; - }, output); - output->enqueue_kernel2_success = status2 == enqueue_status::success; - output->event2_is_valid = event2.is_valid(); - - event1.release(); - event2.release(); - } - )"; - } - else if (options.test == 2) - { - s << R"( - kernel void test(device_queue queue, global *output) - { - const ulong gid = get_global_id(0); - - if (gid != 0) - return; - - output->enqueue_marker_success = 1; - output->event3_is_valid = 1; - output->enqueue_kernel3_success = 1; - - device_queue default_queue = get_default_device_queue(); - - event user_event1 = make_user_event(); - user_event1.retain(); - output->user_event1_is_valid = user_event1.is_valid(); - - ndrange ndrange1(get_global_size(0) / 2); - event event1; - enqueue_status status1 = queue.enqueue_kernel(enqueue_policy::wait_kernel, 1, &user_event1, &event1, ndrange1, - [](global *output){ - const ulong gid = get_global_id(0); - output->values[gid * 2] = 1; - }, output); - output->enqueue_kernel1_success = status1 == enqueue_status::success; - output->event1_is_valid = event1.is_valid(); - user_event1.release(); - - event user_event2 = make_user_event(); - output->user_event2_is_valid = user_event2.is_valid(); - - event events[2]; - events[0] = user_event2; - events[1] = user_event1; - - ndrange ndrange2(1, get_global_size(0) / 2, get_local_size(0)); - event event2; - enqueue_status status2 = default_queue.enqueue_kernel(enqueue_policy::no_wait, 2, events, &event2, ndrange2, - [](global *output, local_ptr l0, local_ptr l1, local_ptr l2) { - const ulong gid = get_global_id(0); - const ulong lid = get_local_id(0); - l1[get_local_size(0) - lid - 1] = gid > 0 ? 1 : 0; - work_group_barrier(mem_fence::local); - if (lid < 5) l0[lid] = int2(3, 4); - if (lid < 3) l2[lid] = 5; - work_group_barrier(mem_fence::local); - output->values[(gid - 1) * 2 + 1] = min(l1[lid], min(l0[0].x, l2[0])); - }, output, local_ptr::size_type(5), local_ptr::size_type(get_local_size(0)), local_ptr::size_type(3)); - output->enqueue_kernel2_success = status2 == enqueue_status::success; - output->event2_is_valid = event2.is_valid(); - - user_event1.set_status(event_status::complete); - user_event2.set_status(event_status::complete); - - user_event1.release(); - user_event2.release(); - event1.release(); - event2.release(); - } - )"; - } - else if (options.test == 3) - { - s << R"( - kernel void test(device_queue queue, global *output) - { - const ulong gid = get_global_id(0); - - if (gid != 0) - return; - - output->user_event2_is_valid = 1; - - device_queue default_queue = get_default_device_queue(); - - ndrange ndrange1(get_global_size(0) / 2); - event event1; - enqueue_status status1 = default_queue.enqueue_kernel(enqueue_policy::wait_work_group, 0, nullptr, &event1, ndrange1, - [](global *output) { - const ulong gid = get_global_id(0); - output->values[gid * 2] = 20; - }, output); - output->enqueue_kernel1_success = status1 == enqueue_status::success; - output->event1_is_valid = event1.is_valid(); - - ndrange ndrange2(1, get_global_size(0) / 2, 1); - event event2; - enqueue_status status2 = queue.enqueue_kernel(enqueue_policy::wait_kernel, 0, nullptr, &event2, ndrange2, - [](global *output) { - const ulong gid = get_global_id(0); - output->values[(gid - 1) * 2 + 1] = 20; - }, output); - output->enqueue_kernel2_success = status2 == enqueue_status::success; - output->event2_is_valid = event2.is_valid(); - - event user_event1 = make_user_event(); - output->user_event1_is_valid = user_event1.is_valid(); - - event events[3]; - events[0] = event2; - events[1] = user_event1; - events[2] = event1; - - event event3; - enqueue_status status3 = queue.enqueue_marker(3, events, &event3); - output->enqueue_marker_success = status3 == enqueue_status::success; - output->event3_is_valid = event3.is_valid(); - - enqueue_status status4 = default_queue.enqueue_kernel(enqueue_policy::no_wait, 1, &event3, nullptr, ndrange(get_global_size(0)), - [](global *output) { - const ulong gid = get_global_id(0); - output->values[gid] /= 20; - }, output); - output->enqueue_kernel3_success = status4 == enqueue_status::success; - - user_event1.set_status(event_status::complete); - - user_event1.release(); - event1.release(); - event2.release(); - event3.release(); - } - )"; - } - - return s.str(); -} -#endif - -int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options) -{ - int error = CL_SUCCESS; - - cl_program program; - cl_kernel kernel; - - std::string kernel_name = "test"; - std::string source = generate_source(options); - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - error = create_opencl_kernel( - context, &program, &kernel, - source, kernel_name - ); - RETURN_ON_ERROR(error) - return error; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - error = create_opencl_kernel( - context, &program, &kernel, - source, kernel_name, "-cl-std=CL2.0", false - ); - RETURN_ON_ERROR(error) -// Normal run -#else - error = create_opencl_kernel( - context, &program, &kernel, - source, kernel_name - ); - RETURN_ON_ERROR(error) -#endif - - cl_uint max_queues; - error = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(cl_uint), &max_queues, NULL); - RETURN_ON_CL_ERROR(error, "clGetDeviceInfo") - - cl_uint max_events; - error = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_EVENTS, sizeof(cl_uint), &max_events, NULL); - RETURN_ON_CL_ERROR(error, "clGetDeviceInfo") - - cl_command_queue device_queue1 = NULL; - cl_command_queue device_queue2 = NULL; - - cl_queue_properties queue_properties1[] = - { - CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT, - 0 - }; - device_queue1 = clCreateCommandQueueWithProperties(context, device, queue_properties1, &error); - RETURN_ON_CL_ERROR(error, "clCreateCommandQueueWithProperties") - - if (max_queues > 1) - { - cl_queue_properties queue_properties2[] = - { - CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE, - 0 - }; - device_queue2 = clCreateCommandQueueWithProperties(context, device, queue_properties2, &error); - RETURN_ON_CL_ERROR(error, "clCreateCommandQueueWithProperties") - } - - cl_mem output_buffer; - output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(output_type), NULL, &error); - RETURN_ON_CL_ERROR(error, "clCreateBuffer") - - error = clSetKernelArg(kernel, 0, sizeof(cl_command_queue), device_queue2 != NULL ? &device_queue2 : &device_queue1); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - error = clSetKernelArg(kernel, 1, sizeof(output_buffer), &output_buffer); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - - const char pattern = 0; - error = clEnqueueFillBuffer(queue, output_buffer, &pattern, sizeof(pattern), 0, sizeof(output_type), 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueFillBuffer") - - size_t max_work_group_size; - error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &max_work_group_size, NULL); - RETURN_ON_CL_ERROR(error, "clGetDeviceInfo") - - const size_t local_size = (std::min)((size_t)256, max_work_group_size); - const size_t global_size = 10000 / local_size * local_size; - const size_t count = global_size; - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel") - - output_type output; - error = clEnqueueReadBuffer( - queue, output_buffer, CL_TRUE, - 0, sizeof(output_type), - static_cast(&output), - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer") - - if (!output.enqueue_kernel1_success) - { - RETURN_ON_ERROR_MSG(-1, "enqueue_kernel did not succeed") - } - if (!output.enqueue_kernel2_success) - { - RETURN_ON_ERROR_MSG(-1, "enqueue_kernel did not succeed") - } - if (!output.enqueue_kernel3_success) - { - RETURN_ON_ERROR_MSG(-1, "enqueue_kernel did not succeed") - } - if (!output.enqueue_marker_success) - { - RETURN_ON_ERROR_MSG(-1, "enqueue_marker did not succeed") - } - if (!output.event1_is_valid) - { - RETURN_ON_ERROR_MSG(-1, "event1 is not valid") - } - if (!output.event2_is_valid) - { - RETURN_ON_ERROR_MSG(-1, "event2 is not valid") - } - if (!output.event3_is_valid) - { - RETURN_ON_ERROR_MSG(-1, "event3 is not valid") - } - if (!output.user_event1_is_valid) - { - RETURN_ON_ERROR_MSG(-1, "user_event1 is not valid") - } - if (!output.user_event2_is_valid) - { - RETURN_ON_ERROR_MSG(-1, "user_event2 is not valid") - } - - for (size_t i = 0; i < count; i++) - { - const cl_int result = output.values[i]; - const cl_int expected = 1; - - if (result != expected) - { - RETURN_ON_ERROR_MSG(-1, - "kernel did not return correct value. Expected: %s, got: %s", - format_value(expected).c_str(), format_value(result).c_str() - ) - } - } - - clReleaseMemObject(output_buffer); - clReleaseCommandQueue(device_queue1); - if (device_queue2 != NULL) - clReleaseCommandQueue(device_queue2); - clReleaseKernel(kernel); - clReleaseProgram(program); - return error; -} - -AUTO_TEST_CASE(test_enqueue_one_kernel) -(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - test_options options; - options.test = 0; - return test(device, context, queue, options); -} - -AUTO_TEST_CASE(test_enqueue_two_kernels) -(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - test_options options; - options.test = 1; - return test(device, context, queue, options); -} - -AUTO_TEST_CASE(test_enqueue_user_events_and_locals) -(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - test_options options; - options.test = 2; - return test(device, context, queue, options); -} - -AUTO_TEST_CASE(test_enqueue_marker) -(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - test_options options; - options.test = 3; - return test(device, context, queue, options); -} - -} // namespace - -#endif // TEST_CONFORMANCE_CLCPP_DEVICE_QUEUE_TEST_ENQUEUE_HPP diff --git a/test_conformance/clcpp/funcs_test_utils.hpp b/test_conformance/clcpp/funcs_test_utils.hpp deleted file mode 100644 index e839231ca5..0000000000 --- a/test_conformance/clcpp/funcs_test_utils.hpp +++ /dev/null @@ -1,72 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_FUNCS_TEST_UTILS_HPP -#define TEST_CONFORMANCE_CLCPP_FUNCS_TEST_UTILS_HPP - -// This file contains helper classes and functions for testing various unary, binary -// and ternary OpenCL functions (for example cl::abs(x) or cl::abs_diff(x, y)), -// as well as other helper functions/classes. - -#include "common.hpp" - -#define TEST_UNARY_FUNC_MACRO(TEST_CLASS) \ - last_error = test_unary_func( \ - device, context, queue, n_elems, TEST_CLASS \ - ); \ - CHECK_ERROR(last_error) \ - error |= last_error; - -#define TEST_BINARY_FUNC_MACRO(TEST_CLASS) \ - last_error = test_binary_func( \ - device, context, queue, n_elems, TEST_CLASS \ - ); \ - CHECK_ERROR(last_error) \ - error |= last_error; - -#define TEST_TERNARY_FUNC_MACRO(TEST_CLASS) \ - last_error = test_ternary_func( \ - device, context, queue, n_elems, TEST_CLASS \ - ); \ - CHECK_ERROR(last_error) \ - error |= last_error; - -#include "utils_test/compare.hpp" -#include "utils_test/generate_inputs.hpp" - -// HOWTO: -// -// unary_func, binary_func, ternary_func - base classes wrapping OpenCL functions that -// you want to test. -// -// To create a wrapper class for given function, you need to create a class derived from correct -// base class (unary_func, binary_func, ternary_func), and define: -// -// * std::string str() method which should return class name in OpenCL ("abs", "abs_diff"), -// * operator(x), operator(x, y) or operator(x,y,z) depending on arity of the function you wish -// to test, method should work exactly as the tested function works in OpenCL -// * if it's needed you can overload min1, max1, min2, max2, min3, max3 methods with returns min -// and max values that can be generated for given input (function argument) [required for vec -// arguments], -// * if you want to use vector arguments (for example: cl_int2, cl_ulong16), you should look at -// how int_func_clamp<> is implemented in integer_funcs/numeric_funcs.hpp. -// -// To see how you should use class you've just created see AUTO_TEST_CASE(test_int_numeric_funcs) -// in integer_funcs/numeric_funcs.hpp. -#include "utils_test/unary.hpp" -#include "utils_test/binary.hpp" -#include "utils_test/ternary.hpp" - -#endif // TEST_CONFORMANCE_CLCPP_FUNCS_TEST_UTILS_HPP diff --git a/test_conformance/clcpp/geometric_funcs/CMakeLists.txt b/test_conformance/clcpp/geometric_funcs/CMakeLists.txt deleted file mode 100644 index 25d05ed14c..0000000000 --- a/test_conformance/clcpp/geometric_funcs/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -set(MODULE_NAME CPP_GEOMETRIC_FUNCS) - -set(${MODULE_NAME}_SOURCES - main.cpp -) - -include(../../CMakeCommon.txt) diff --git a/test_conformance/clcpp/geometric_funcs/fast_geometric_funcs.hpp b/test_conformance/clcpp/geometric_funcs/fast_geometric_funcs.hpp deleted file mode 100644 index c179728889..0000000000 --- a/test_conformance/clcpp/geometric_funcs/fast_geometric_funcs.hpp +++ /dev/null @@ -1,229 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_GEOMETRIC_FUNCS_FAST_GEOMETRIC_FUNCS_HPP -#define TEST_CONFORMANCE_CLCPP_GEOMETRIC_FUNCS_FAST_GEOMETRIC_FUNCS_HPP - -#include "../common.hpp" -#include "../funcs_test_utils.hpp" - -#include - -// float fast_distance(float4 p0, float4 p1); -struct geometric_func_fast_distance : public binary_func -{ - - std::string str() - { - return "fast_distance"; - } - - std::string headers() - { - return "#include \n"; - } - - cl_float operator()(const cl_float4& p0, const cl_float4& p1) - { - cl_double r = 0.0f; - cl_double t; - for(size_t i = 0; i < 4; i++) - { - t = static_cast(p0.s[i]) - static_cast(p1.s[i]); - r += t * t; - } - return std::sqrt(r); - } - - cl_float4 min1() - { - return detail::def_limit(-512.0f); - } - - cl_float4 max1() - { - return detail::def_limit(512.0f); - } - - cl_float4 min2() - { - return detail::def_limit(-512.0f); - } - - cl_float4 max2() - { - return detail::def_limit(512.0f); - } - - cl_double delta(const cl_float4& p0, const cl_float4& p1, const cl_float& expected) - { - (void) p0; (void) p1; - return 0.01f * expected; - } - - float ulp() - { - return - 8192.0f + // error in sqrt - (1.5f * 4.0f) + // cumulative error for multiplications - (0.5f * 3.0f); // cumulative error for additions - } -}; - -// float fast_length(float4 p); -struct geometric_func_fast_length : public unary_func -{ - std::string str() - { - return "fast_length"; - } - - std::string headers() - { - return "#include \n"; - } - - cl_float operator()(const cl_float4& p) - { - cl_double r = 0.0f; - for(size_t i = 0; i < 4; i++) - { - r += static_cast(p.s[i]) * static_cast(p.s[i]); - } - return std::sqrt(r); - } - - cl_float4 min1() - { - return detail::def_limit(-512.0f); - } - - cl_float4 max1() - { - return detail::def_limit(512.0f); - } - - cl_double delta(const cl_float4& p, const cl_float& expected) - { - (void) p; - return 0.01f * expected; - } - - float ulp() - { - return - 8192.0f + // error in sqrt - 0.5f * // effect on e of taking sqrt( x + e ) - ((0.5f * 4.0f) + // cumulative error for multiplications - (0.5f * 3.0f)); // cumulative error for additions - } -}; - -// float4 fast_normalize(float4 p); -struct geometric_func_fast_normalize : public unary_func -{ - std::string str() - { - return "fast_normalize"; - } - - std::string headers() - { - return "#include \n"; - } - - cl_float4 operator()(const cl_float4& p) - { - cl_double t = 0.0f; - cl_float4 r; - for(size_t i = 0; i < 4; i++) - { - t += static_cast(p.s[i]) * static_cast(p.s[i]); - } - - if(t == 0.0f) - { - for(size_t i = 0; i < 4; i++) - { - r.s[i] = 0.0f; - } - return r; - } - - t = std::sqrt(t); - for(size_t i = 0; i < 4; i++) - { - r.s[i] = static_cast(p.s[i]) / t; - } - return r; - } - - cl_float4 min1() - { - return detail::def_limit(-512.0f); - } - - cl_float4 max1() - { - return detail::def_limit(512.0f); - } - - std::vector in_special_cases() - { - return { - {0.0f, 0.0f, 0.0f, 0.0f} - }; - } - - - cl_double4 delta(const cl_float4& p, const cl_float4& expected) - { - (void) p; - auto e = detail::make_value(0.01f); - return detail::multiply(e, expected); - } - - float ulp() - { - return - 8192.5f + // error in rsqrt + error in multiply - (0.5f * 4.0f) + // cumulative error for multiplications - (0.5f * 3.0f); // cumulative error for additions - } -}; - -AUTO_TEST_CASE(test_fast_geometric_funcs) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - - // float fast_distance(float4 p0, float4 p1) - TEST_BINARY_FUNC_MACRO((geometric_func_fast_distance())) - - // float fast_length(float4 p) - TEST_UNARY_FUNC_MACRO((geometric_func_fast_length())) - - // float4 fast_normalize(float4 p) - TEST_UNARY_FUNC_MACRO((geometric_func_fast_normalize())) - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_GEOMETRIC_FUNCS_FAST_GEOMETRIC_FUNCS_HPP diff --git a/test_conformance/clcpp/geometric_funcs/geometric_funcs.hpp b/test_conformance/clcpp/geometric_funcs/geometric_funcs.hpp deleted file mode 100644 index 561f9e9bd1..0000000000 --- a/test_conformance/clcpp/geometric_funcs/geometric_funcs.hpp +++ /dev/null @@ -1,389 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_GEOMETRIC_FUNCS_GEOMETRIC_FUNCS_HPP -#define TEST_CONFORMANCE_CLCPP_GEOMETRIC_FUNCS_GEOMETRIC_FUNCS_HPP - -#include "../common.hpp" -#include "../funcs_test_utils.hpp" - -#include - -// float4 cross(float4 p0, float4 p1) -struct geometric_func_cross : public binary_func -{ - geometric_func_cross(cl_device_id device) - { - // On an embedded device w/ round-to-zero, 3 ulps is the worst-case tolerance for cross product - this->m_delta = 3.0f * CL_FLT_EPSILON; - // RTZ devices accrue approximately double the amount of error per operation. Allow for that. - if(get_default_rounding_mode(device) == CL_FP_ROUND_TO_ZERO) - { - this->m_delta *= 2.0f; - } - } - - std::string str() - { - return "cross"; - } - - std::string headers() - { - return "#include \n"; - } - - cl_float4 operator()(const cl_float4& p0, const cl_float4& p1) - { - cl_float4 r; - r.s[0] = (p0.s[1] * p1.s[2]) - (p0.s[2] * p1.s[1]); - r.s[1] = (p0.s[2] * p1.s[0]) - (p0.s[0] * p1.s[2]); - r.s[2] = (p0.s[0] * p1.s[1]) - (p0.s[1] * p1.s[0]); - r.s[3] = 0.0f; - return r; - } - - cl_float4 max1() - { - return detail::def_limit(1000.0f); - } - - cl_float4 max2() - { - return detail::def_limit(1000.0f); - } - - cl_float4 min1() - { - return detail::def_limit(-1000.0f); - } - - cl_float4 min2() - { - return detail::def_limit(-1000.0f); - } - - bool use_ulp() - { - return false; - } - - cl_double4 delta(const cl_float4& p0, const cl_float4& p1, const cl_float4& expected) - { - (void) p0; (void) p1; - auto e = detail::make_value(m_delta); - return detail::multiply(e, expected); - } - -private: - cl_double m_delta; -}; - -// float dot(float4 p0, float4 p1); -struct geometric_func_dot : public binary_func -{ - - std::string str() - { - return "dot"; - } - - std::string headers() - { - return "#include \n"; - } - - cl_float operator()(const cl_float4& p0, const cl_float4& p1) - { - cl_float r; - r = p0.s[0] * p1.s[0]; - r += p0.s[1] * p1.s[1]; - r += p0.s[2] * p1.s[2]; - r += p0.s[3] * p1.s[3]; - return r; - } - - cl_float4 max1() - { - return detail::def_limit(1000.0f); - } - - cl_float4 max2() - { - return detail::def_limit(1000.0f); - } - - cl_float4 min1() - { - return detail::def_limit(-1000.0f); - } - - cl_float4 min2() - { - return detail::def_limit(-1000.0f); - } - - bool use_ulp() - { - return false; - } - - cl_double delta(const cl_float4& p0, const cl_float4& p1, cl_float expected) - { - (void) p0; (void) p1; - return expected * ((4.0f + (4.0f - 1.0f)) * CL_FLT_EPSILON); - } -}; - -// float distance(float4 p0, float4 p1); -struct geometric_func_distance : public binary_func -{ - - std::string str() - { - return "distance"; - } - - std::string headers() - { - return "#include \n"; - } - - cl_float operator()(const cl_float4& p0, const cl_float4& p1) - { - cl_double r = 0.0f; - cl_double t; - for(size_t i = 0; i < 4; i++) - { - t = static_cast(p0.s[i]) - static_cast(p1.s[i]); - r += t * t; - } - return std::sqrt(r); - } - - cl_float4 max1() - { - return detail::def_limit(1000.0f); - } - - cl_float4 max2() - { - return detail::def_limit(1000.0f); - } - - cl_float4 min1() - { - return detail::def_limit(-1000.0f); - } - - cl_float4 min2() - { - return detail::def_limit(-1000.0f); - } - - float ulp() - { - return - 3.0f + // error in sqrt - (1.5f * 4.0f) + // cumulative error for multiplications - (0.5f * 3.0f); // cumulative error for additions - } -}; - -// float length(float4 p); -struct geometric_func_length : public unary_func -{ - - std::string str() - { - return "length"; - } - - std::string headers() - { - return "#include \n"; - } - - cl_float operator()(const cl_float4& p) - { - cl_double r = 0.0f; - for(size_t i = 0; i < 4; i++) - { - r += static_cast(p.s[i]) * static_cast(p.s[i]); - } - return std::sqrt(r); - } - - cl_float4 max1() - { - return detail::def_limit(1000.0f); - } - - cl_float4 min1() - { - return detail::def_limit(-1000.0f); - } - - float ulp() - { - return - 3.0f + // error in sqrt - 0.5f * // effect on e of taking sqrt( x + e ) - ((0.5f * 4.0f) + // cumulative error for multiplications - (0.5f * 3.0f)); // cumulative error for additions - } -}; - -// float4 normalize(float4 p); -struct geometric_func_normalize : public unary_func -{ - std::string str() - { - return "normalize"; - } - - std::string headers() - { - return "#include \n"; - } - - cl_float4 operator()(const cl_float4& p) - { - cl_double t = 0.0f; - cl_float4 r; - - // normalize( v ) returns a vector full of NaNs if any element is a NaN. - for(size_t i = 0; i < 4; i++) - { - if((std::isnan)(p.s[i])) - { - for(size_t j = 0; j < 4; j++) - { - r.s[j] = p.s[i]; - } - return r; - } - } - - // normalize( v ) for which any element in v is infinite shall proceed as - // if the elements in v were replaced as follows: - // for( i = 0; i < sizeof(v) / sizeof(v[0] ); i++ ) - // v[i] = isinf(v[i]) ? copysign(1.0, v[i]) : 0.0 * v [i]; - for(size_t i = 0; i < 4; i++) - { - if((std::isinf)(p.s[i])) - { - for(size_t j = 0; j < 4; j++) - { - r.s[j] = (std::isinf)(p.s[j]) ? (std::copysign)(1.0, p.s[j]) : 0.0 * p.s[j]; - } - r = (*this)(r); - return r; - } - } - - for(size_t i = 0; i < 4; i++) - { - t += static_cast(p.s[i]) * static_cast(p.s[i]); - } - - // normalize( v ) returns v if all elements of v are zero. - if(t == 0.0f) - { - for(size_t i = 0; i < 4; i++) - { - r.s[i] = 0.0f; - } - return r; - } - - t = std::sqrt(t); - for(size_t i = 0; i < 4; i++) - { - r.s[i] = static_cast(p.s[i]) / t; - } - - return r; - } - - cl_float4 max1() - { - return detail::def_limit(1000.0f); - } - - cl_float4 min1() - { - return detail::def_limit(-1000.0f); - } - - std::vector in_special_cases() - { - return { - {0.0f, 0.0f, 0.0f, 0.0f}, - {std::numeric_limits::infinity(), 0.0f, 0.0f, 0.0f}, - { - std::numeric_limits::infinity(), - std::numeric_limits::infinity(), - std::numeric_limits::infinity(), - std::numeric_limits::infinity() - }, - { - std::numeric_limits::infinity(), - 1.0f, - 0.0f, - std::numeric_limits::quiet_NaN() - }, - {-1.0f, -1.0f, 0.0f,-300.0f} - }; - } - - float ulp() - { - return - 2.5f + // error in rsqrt + error in multiply - (0.5f * 4.0f) + // cumulative error for multiplications - (0.5f * 3.0f); // cumulative error for additions - } -}; - -AUTO_TEST_CASE(test_geometric_funcs) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - - // float4 cross(float4 p0, float4 p1) - TEST_BINARY_FUNC_MACRO((geometric_func_cross(device))) - - // float dot(float4 p0, float4 p1) - TEST_BINARY_FUNC_MACRO((geometric_func_dot())) - - // float distance(float4 p0, float4 p1) - TEST_BINARY_FUNC_MACRO((geometric_func_distance())) - - // float length(float4 p) - TEST_UNARY_FUNC_MACRO((geometric_func_length())) - - // float4 normalize(float4 p) - TEST_UNARY_FUNC_MACRO((geometric_func_normalize())) - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_GEOMETRIC_FUNCS_GEOMETRIC_FUNCS_HPP diff --git a/test_conformance/clcpp/geometric_funcs/main.cpp b/test_conformance/clcpp/geometric_funcs/main.cpp deleted file mode 100644 index ed35805c95..0000000000 --- a/test_conformance/clcpp/geometric_funcs/main.cpp +++ /dev/null @@ -1,44 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include - -#include "../common.hpp" - -#include "geometric_funcs.hpp" -#include "fast_geometric_funcs.hpp" - -int main(int argc, const char *argv[]) -{ - // Check if cl_float (float) and cl_double (double) fulfill the requirements of - // IEC 559 (IEEE 754) standard. This is required for the tests to run correctly. - if(!std::numeric_limits::is_iec559) - { - RETURN_ON_ERROR_MSG(-1, - "cl_float (float) does not fulfill the requirements of IEC 559 (IEEE 754) standard. " - "Tests won't run correctly." - ); - } - if(!std::numeric_limits::is_iec559) - { - RETURN_ON_ERROR_MSG(-1, - "cl_double (double) does not fulfill the requirements of IEC 559 (IEEE 754) standard. " - "Tests won't run correctly." - ); - } - - auto& tests = autotest::test_suite::global_test_suite().test_defs; - return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0); -} diff --git a/test_conformance/clcpp/images/CMakeLists.txt b/test_conformance/clcpp/images/CMakeLists.txt deleted file mode 100644 index 3c92ecd7df..0000000000 --- a/test_conformance/clcpp/images/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -set(MODULE_NAME CPP_IMAGES) - -set(${MODULE_NAME}_SOURCES - main.cpp -) - -include(../../CMakeCommon.txt) diff --git a/test_conformance/clcpp/images/common.hpp b/test_conformance/clcpp/images/common.hpp deleted file mode 100644 index 957d266dcf..0000000000 --- a/test_conformance/clcpp/images/common.hpp +++ /dev/null @@ -1,195 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_IMAGES_COMMON_HPP -#define TEST_CONFORMANCE_CLCPP_IMAGES_COMMON_HPP - -#include - -#include "../common.hpp" -#include "../funcs_test_utils.hpp" - -#include "../harness/imageHelpers.h" - - -namespace detail -{ - -template -struct channel_info; - -template<> -struct channel_info -{ - typedef cl_char channel_type; - typedef cl_int4 element_type; - static std::string function_suffix() { return "i"; } - - channel_type channel_min() { return (std::numeric_limits::min)(); } - channel_type channel_max() { return (std::numeric_limits::max)(); } -}; - -template<> -struct channel_info -{ - typedef cl_short channel_type; - typedef cl_int4 element_type; - static std::string function_suffix() { return "i"; } - - channel_type channel_min() { return (std::numeric_limits::min)(); } - channel_type channel_max() { return (std::numeric_limits::max)(); } -}; - -template<> -struct channel_info -{ - typedef cl_int channel_type; - typedef cl_int4 element_type; - static std::string function_suffix() { return "i"; } - - channel_type channel_min() { return (std::numeric_limits::min)(); } - channel_type channel_max() { return (std::numeric_limits::max)(); } -}; - -template<> -struct channel_info -{ - typedef cl_uchar channel_type; - typedef cl_uint4 element_type; - static std::string function_suffix() { return "ui"; } - - channel_type channel_min() { return (std::numeric_limits::min)(); } - channel_type channel_max() { return (std::numeric_limits::max)(); } -}; - -template<> -struct channel_info -{ - typedef cl_ushort channel_type; - typedef cl_uint4 element_type; - static std::string function_suffix() { return "ui"; } - - channel_type channel_min() { return (std::numeric_limits::min)(); } - channel_type channel_max() { return (std::numeric_limits::max)(); } -}; - -template<> -struct channel_info -{ - typedef cl_uint channel_type; - typedef cl_uint4 element_type; - static std::string function_suffix() { return "ui"; } - - channel_type channel_min() { return (std::numeric_limits::min)(); } - channel_type channel_max() { return (std::numeric_limits::max)(); } -}; - -template<> -struct channel_info -{ - typedef cl_float channel_type; - typedef cl_float4 element_type; - static std::string function_suffix() { return "f"; } - - channel_type channel_min() { return -1e-3f; } - channel_type channel_max() { return +1e+3f; } -}; - -template -struct image_info; - -template<> -struct image_info -{ - static std::string image_type_name() { return "image1d"; } - static std::string coord_accessor() { return "x"; } -}; - -template<> -struct image_info -{ - static std::string image_type_name() { return "image2d"; } - static std::string coord_accessor() { return "xy"; } -}; - -template<> -struct image_info -{ - static std::string image_type_name() { return "image3d"; } -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - static std::string coord_accessor() { return "xyzw"; } -#else - static std::string coord_accessor() { return "xyz"; } -#endif -}; - -} // namespace - -template -struct image_test_base : - detail::channel_info, - detail::image_info -{ }; - -// Create image_descriptor (used by harness/imageHelpers functions) -image_descriptor create_image_descriptor(cl_image_desc &image_desc, cl_image_format *image_format) -{ - image_descriptor image_info; - image_info.width = image_desc.image_width; - image_info.height = image_desc.image_height; - image_info.depth = image_desc.image_depth; - image_info.arraySize = image_desc.image_array_size; - image_info.rowPitch = image_desc.image_row_pitch; - image_info.slicePitch = image_desc.image_slice_pitch; - image_info.format = image_format; - image_info.buffer = image_desc.mem_object; - image_info.type = image_desc.image_type; - image_info.num_mip_levels = image_desc.num_mip_levels; - return image_info; -} - -const std::vector get_channel_orders(cl_device_id device) -{ - // According to "Minimum List of Supported Image Formats" of OpenCL specification: - return { CL_R, CL_RG, CL_RGBA }; -} - -bool is_test_supported(cl_device_id device) -{ - // Check for image support - if (checkForImageSupport(device) == CL_IMAGE_FORMAT_NOT_SUPPORTED) - { - log_info("SKIPPED: Device does not support images. Skipping test.\n"); - return false; - } - return true; -} - -// Checks if x is equal to y. -template -inline bool are_equal(const type& x, - const type& y) -{ - for(size_t i = 0; i < vector_size::value; i++) - { - if(!(x.s[i] == y.s[i])) - { - return false; - } - } - return true; -} - -#endif // TEST_CONFORMANCE_CLCPP_IMAGES_COMMON_HPP diff --git a/test_conformance/clcpp/images/main.cpp b/test_conformance/clcpp/images/main.cpp deleted file mode 100644 index bbda559d97..0000000000 --- a/test_conformance/clcpp/images/main.cpp +++ /dev/null @@ -1,30 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "../common.hpp" - -#include "test_read.hpp" -#include "test_sample.hpp" -#include "test_write.hpp" - -// FIXME: To use certain functions in test_common/harness/imageHelpers.h -// (for example, generate_random_image_data()), the tests are required to declare -// the following variable (hangover from code specific to Apple's implementation): - -int main(int argc, const char *argv[]) -{ - auto& tests = autotest::test_suite::global_test_suite().test_defs; - return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0); -} diff --git a/test_conformance/clcpp/images/test_read.hpp b/test_conformance/clcpp/images/test_read.hpp deleted file mode 100644 index 3bc7b5efb2..0000000000 --- a/test_conformance/clcpp/images/test_read.hpp +++ /dev/null @@ -1,307 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_IMAGES_TEST_READ_HPP -#define TEST_CONFORMANCE_CLCPP_IMAGES_TEST_READ_HPP - -#include -#include -#include -#include - -#include "common.hpp" - - -namespace test_images_read { - -template -struct image_test : image_test_base -{ - cl_channel_order channel_order; - - image_test(cl_channel_order channel_order) : - channel_order(channel_order) - { } -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - std::string generate_source() - { - std::stringstream s; - s << R"( - typedef )" << type_name() << R"( element_type; - - kernel void test( - read_only )" << image_test::image_type_name() << R"(_t img, - const global int4 *coords, - global element_type *output - ) { - const ulong gid = get_global_linear_id(); - - output[gid] = read_image)" << image_test::function_suffix() << - "(img, coords[gid]." << image_test::coord_accessor() << R"(); - } - )"; - - return s.str(); - } -#else - std::string generate_source() - { - std::stringstream s; - s << R"( - #include - #include - #include - #include - using namespace cl; - )"; - - s << R"( - typedef )" << type_name() << R"( element_type; - - kernel void test( - const )" << image_test::image_type_name() << R"( img, - const global_ptr coords, - global_ptr output - ) { - const ulong gid = get_global_linear_id(); - - output[gid] = img.read(coords[gid].)" << image_test::coord_accessor() << R"(); - } - )"; - - return s.str(); - } -#endif - - int run(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) - { - int error = CL_SUCCESS; - - cl_program program; - cl_kernel kernel; - - std::string kernel_name = "test"; - std::string source = generate_source(); - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - error = create_opencl_kernel( - context, &program, &kernel, - source, kernel_name - ); - RETURN_ON_ERROR(error) - return error; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - error = create_opencl_kernel( - context, &program, &kernel, - source, kernel_name, "-cl-std=CL2.0", false - ); - RETURN_ON_ERROR(error) -// Normal run -#else - error = create_opencl_kernel( - context, &program, &kernel, - source, kernel_name - ); - RETURN_ON_ERROR(error) -#endif - - using element_type = typename image_test::element_type; - using coord_type = cl_int4; - using scalar_element_type = typename scalar_type::type; - using channel_type = typename image_test::channel_type; - - cl_image_format image_format; - image_format.image_channel_order = channel_order; - image_format.image_channel_data_type = ChannelType; - - const size_t pixel_size = get_pixel_size(&image_format); - const size_t channel_count = get_channel_order_channel_count(image_format.image_channel_order); - - cl_image_desc image_desc; - image_desc.image_type = ImageType; - if (ImageType == CL_MEM_OBJECT_IMAGE1D) - { - image_desc.image_width = 2048; - image_desc.image_height = 1; - image_desc.image_depth = 1; - } - else if (ImageType == CL_MEM_OBJECT_IMAGE2D) - { - image_desc.image_width = 256; - image_desc.image_height = 256; - image_desc.image_depth = 1; - } - else if (ImageType == CL_MEM_OBJECT_IMAGE3D) - { - image_desc.image_width = 64; - image_desc.image_height = 64; - image_desc.image_depth = 64; - } - image_desc.image_array_size = 0; - image_desc.image_row_pitch = image_desc.image_width * pixel_size; - image_desc.image_slice_pitch = image_desc.image_row_pitch * image_desc.image_height; - image_desc.num_mip_levels = 0; - image_desc.num_samples = 0; - image_desc.mem_object = NULL; - - image_descriptor image_info = create_image_descriptor(image_desc, &image_format); - - std::vector image_values = generate_input( - image_desc.image_width * image_desc.image_height * image_desc.image_depth * channel_count, - image_test::channel_min(), image_test::channel_max(), - std::vector() - ); - - const size_t count = num_elements; - - std::vector coords = generate_input( - count, - detail::make_value(0), - coord_type { - static_cast(image_desc.image_width - 1), - static_cast(image_desc.image_height - 1), - static_cast(image_desc.image_depth - 1), - 0 - }, - std::vector() - ); - - cl_mem img = clCreateImage(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - &image_format, &image_desc, static_cast(image_values.data()), &error); - RETURN_ON_CL_ERROR(error, "clCreateImage") - - cl_mem coords_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - sizeof(coord_type) * count, static_cast(coords.data()), &error); - RETURN_ON_CL_ERROR(error, "clCreateBuffer") - - cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(element_type) * count, NULL, &error); - RETURN_ON_CL_ERROR(error, "clCreateBuffer") - - error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &img); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - error = clSetKernelArg(kernel, 1, sizeof(coords_buffer), &coords_buffer); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - error = clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - - const size_t global_size = count; - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel") - - std::vector output(count); - error = clEnqueueReadBuffer( - queue, output_buffer, CL_TRUE, - 0, sizeof(element_type) * count, - static_cast(output.data()), - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer") - - for (size_t i = 0; i < count; i++) - { - const coord_type c = coords[i]; - const element_type result = output[i]; - - element_type expected; - read_image_pixel(static_cast(image_values.data()), &image_info, - c.s[0], c.s[1], c.s[2], - expected.s); - - if (!are_equal(result, expected)) - { - RETURN_ON_ERROR_MSG(-1, - "Reading from coordinates %s failed. Expected: %s, got: %s", - format_value(c).c_str(), format_value(expected).c_str(), format_value(result).c_str() - ); - } - } - - clReleaseMemObject(img); - clReleaseMemObject(coords_buffer); - clReleaseMemObject(output_buffer); - clReleaseKernel(kernel); - clReleaseProgram(program); - return error; - } -}; - -template -int run_test_cases(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - if (!is_test_supported(device)) - return CL_SUCCESS; - - int error = CL_SUCCESS; - - for (auto channel_order : get_channel_orders(device)) - { - error = image_test(channel_order) - .run(device, context, queue, num_elements); - RETURN_ON_ERROR(error) - error = image_test(channel_order) - .run(device, context, queue, num_elements); - RETURN_ON_ERROR(error) - error = image_test(channel_order) - .run(device, context, queue, num_elements); - RETURN_ON_ERROR(error) - - error = image_test(channel_order) - .run(device, context, queue, num_elements); - RETURN_ON_ERROR(error) - error = image_test(channel_order) - .run(device, context, queue, num_elements); - RETURN_ON_ERROR(error) - error = image_test(channel_order) - .run(device, context, queue, num_elements); - RETURN_ON_ERROR(error) - - error = image_test(channel_order) - .run(device, context, queue, num_elements); - RETURN_ON_ERROR(error) - } - - return error; -} - - -AUTO_TEST_CASE(test_images_read_1d) -(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - return run_test_cases(device, context, queue, num_elements); -} - -AUTO_TEST_CASE(test_images_read_2d) -(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - return run_test_cases(device, context, queue, num_elements); -} - -AUTO_TEST_CASE(test_images_read_3d) -(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - return run_test_cases(device, context, queue, num_elements); -} - -} // namespace - -#endif // TEST_CONFORMANCE_CLCPP_IMAGES_TEST_READ_HPP diff --git a/test_conformance/clcpp/images/test_sample.hpp b/test_conformance/clcpp/images/test_sample.hpp deleted file mode 100644 index a96a563a32..0000000000 --- a/test_conformance/clcpp/images/test_sample.hpp +++ /dev/null @@ -1,363 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_IMAGES_TEST_SAMPLE_HPP -#define TEST_CONFORMANCE_CLCPP_IMAGES_TEST_SAMPLE_HPP - -#include -#include -#include -#include - -#include "common.hpp" - - -namespace test_images_sample { - -enum class sampler_source -{ - param, - program_scope -}; - -const sampler_source sampler_sources[] = { sampler_source::param, sampler_source::program_scope }; - -template -struct image_test : image_test_base -{ - cl_channel_order channel_order; - sampler_source source; - - image_test(cl_channel_order channel_order, sampler_source source) : - channel_order(channel_order), - source(source) - { } - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - std::string generate_source() - { - std::stringstream s; - s << R"( - typedef )" << type_name() << R"( element_type; - )"; - - std::string sampler; - if (source == sampler_source::program_scope) - { - s << R"( - constant sampler_t sampler_program_scope = CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE; - )"; - sampler = "sampler_program_scope"; - } - else if (source == sampler_source::param) - { - sampler = "sampler_param"; - } - - s << R"( - kernel void test( - read_only )" << image_test::image_type_name() << R"(_t img, - const global int4 *coords, - global element_type *output, - sampler_t sampler_param - ) { - const ulong gid = get_global_linear_id(); - - output[gid] = read_image)" << image_test::function_suffix() << - "(img, " << sampler << ", coords[gid]." << image_test::coord_accessor() << R"(); - } - )"; - - return s.str(); - } -#else - std::string generate_source() - { - std::stringstream s; - s << R"( - #include - #include - #include - #include - using namespace cl; - )"; - - s << R"( - typedef )" << type_name() << R"( element_type; - )"; - - std::string sampler; - if (source == sampler_source::program_scope) - { - s << R"( - sampler sampler_program_scope = make_sampler(); - )"; - sampler = "sampler_program_scope"; - } - else if (source == sampler_source::param) - { - sampler = "sampler_param"; - } - - s << R"( - kernel void test( - const )" << image_test::image_type_name() << R"( img, - const global_ptr coords, - global_ptr output, - sampler sampler_param - ) { - const ulong gid = get_global_linear_id(); - - output[gid] = img.sample()" << sampler << ", coords[gid]." << image_test::coord_accessor() << R"(); - } - )"; - - return s.str(); - } -#endif - - int run(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) - { - int error = CL_SUCCESS; - - cl_program program; - cl_kernel kernel; - - std::string kernel_name = "test"; - std::string source = generate_source(); - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - error = create_opencl_kernel( - context, &program, &kernel, - source, kernel_name - ); - RETURN_ON_ERROR(error) - return error; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - error = create_opencl_kernel( - context, &program, &kernel, - source, kernel_name, "-cl-std=CL2.0", false - ); - RETURN_ON_ERROR(error) -// Normal run -#else - error = create_opencl_kernel( - context, &program, &kernel, - source, kernel_name - ); - RETURN_ON_ERROR(error) -#endif - - using element_type = typename image_test::element_type; - using coord_type = cl_int4; - using scalar_element_type = typename scalar_type::type; - using channel_type = typename image_test::channel_type; - - cl_image_format image_format; - image_format.image_channel_order = channel_order; - image_format.image_channel_data_type = ChannelType; - - const size_t pixel_size = get_pixel_size(&image_format); - const size_t channel_count = get_channel_order_channel_count(image_format.image_channel_order); - - cl_image_desc image_desc; - image_desc.image_type = ImageType; - if (ImageType == CL_MEM_OBJECT_IMAGE1D) - { - image_desc.image_width = 2048; - image_desc.image_height = 1; - image_desc.image_depth = 1; - } - else if (ImageType == CL_MEM_OBJECT_IMAGE2D) - { - image_desc.image_width = 256; - image_desc.image_height = 256; - image_desc.image_depth = 1; - } - else if (ImageType == CL_MEM_OBJECT_IMAGE3D) - { - image_desc.image_width = 64; - image_desc.image_height = 64; - image_desc.image_depth = 64; - } - image_desc.image_array_size = 0; - image_desc.image_row_pitch = image_desc.image_width * pixel_size; - image_desc.image_slice_pitch = image_desc.image_row_pitch * image_desc.image_height; - image_desc.num_mip_levels = 0; - image_desc.num_samples = 0; - image_desc.mem_object = NULL; - - image_descriptor image_info = create_image_descriptor(image_desc, &image_format); - - std::vector image_values = generate_input( - image_desc.image_width * image_desc.image_height * image_desc.image_depth * channel_count, - image_test::channel_min(), image_test::channel_max(), - std::vector() - ); - - const size_t count = num_elements; - - std::vector coords = generate_input( - count, - detail::make_value(0), - coord_type { - static_cast(image_desc.image_width - 1), - static_cast(image_desc.image_height - 1), - static_cast(image_desc.image_depth - 1), - 0 - }, - std::vector() - ); - - cl_mem img = clCreateImage(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - &image_format, &image_desc, static_cast(image_values.data()), &error); - RETURN_ON_CL_ERROR(error, "clCreateImage") - - cl_mem coords_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - sizeof(coord_type) * count, static_cast(coords.data()), &error); - RETURN_ON_CL_ERROR(error, "clCreateBuffer") - - cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(element_type) * count, NULL, &error); - RETURN_ON_CL_ERROR(error, "clCreateBuffer") - - const cl_sampler_properties sampler_properties[] = { - CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE, - CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_NONE, - CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST, - 0 - }; - cl_sampler sampler = clCreateSamplerWithProperties(context, sampler_properties, &error); - RETURN_ON_CL_ERROR(error, "clCreateSamplerWithProperties") - - error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &img); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - error = clSetKernelArg(kernel, 1, sizeof(coords_buffer), &coords_buffer); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - error = clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - error = clSetKernelArg(kernel, 3, sizeof(sampler), &sampler); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - - const size_t global_size = count; - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel") - - std::vector output(count); - error = clEnqueueReadBuffer( - queue, output_buffer, CL_TRUE, - 0, sizeof(element_type) * count, - static_cast(output.data()), - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer") - - for (size_t i = 0; i < count; i++) - { - const coord_type c = coords[i]; - const element_type result = output[i]; - - element_type expected; - read_image_pixel(static_cast(image_values.data()), &image_info, - c.s[0], c.s[1], c.s[2], - expected.s); - - if (!are_equal(result, expected)) - { - RETURN_ON_ERROR_MSG(-1, - "Sampling from coordinates %s failed. Expected: %s, got: %s", - format_value(c).c_str(), format_value(expected).c_str(), format_value(result).c_str() - ); - } - } - - clReleaseMemObject(img); - clReleaseMemObject(coords_buffer); - clReleaseMemObject(output_buffer); - clReleaseSampler(sampler); - clReleaseKernel(kernel); - clReleaseProgram(program); - return error; - } -}; - -template -int run_test_cases(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - if (!is_test_supported(device)) - return CL_SUCCESS; - - int error = CL_SUCCESS; - - for (auto channel_order : get_channel_orders(device)) - for (auto source : sampler_sources) - { - error = image_test(channel_order, source) - .run(device, context, queue, num_elements); - RETURN_ON_ERROR(error) - error = image_test(channel_order, source) - .run(device, context, queue, num_elements); - RETURN_ON_ERROR(error) - error = image_test(channel_order, source) - .run(device, context, queue, num_elements); - RETURN_ON_ERROR(error) - - error = image_test(channel_order, source) - .run(device, context, queue, num_elements); - RETURN_ON_ERROR(error) - error = image_test(channel_order, source) - .run(device, context, queue, num_elements); - RETURN_ON_ERROR(error) - error = image_test(channel_order, source) - .run(device, context, queue, num_elements); - RETURN_ON_ERROR(error) - - error = image_test(channel_order, source) - .run(device, context, queue, num_elements); - RETURN_ON_ERROR(error) - } - - return error; -} - - -AUTO_TEST_CASE(test_images_sample_1d) -(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - return run_test_cases(device, context, queue, num_elements); -} - -AUTO_TEST_CASE(test_images_sample_2d) -(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - return run_test_cases(device, context, queue, num_elements); -} - -AUTO_TEST_CASE(test_images_sample_3d) -(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - return run_test_cases(device, context, queue, num_elements); -} - -} // namespace - -#endif // TEST_CONFORMANCE_CLCPP_IMAGES_TEST_SAMPLE_HPP diff --git a/test_conformance/clcpp/images/test_write.hpp b/test_conformance/clcpp/images/test_write.hpp deleted file mode 100644 index 0f54487456..0000000000 --- a/test_conformance/clcpp/images/test_write.hpp +++ /dev/null @@ -1,327 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_IMAGES_TEST_WRITE_HPP -#define TEST_CONFORMANCE_CLCPP_IMAGES_TEST_WRITE_HPP - -#include -#include -#include -#include -#include - -#include "common.hpp" - - -namespace test_images_write { - -template -struct image_test : image_test_base -{ - cl_channel_order channel_order; - - image_test(cl_channel_order channel_order) : - channel_order(channel_order) - { } -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - std::string generate_source() - { - std::stringstream s; - s << R"( - typedef )" << type_name() << R"( element_type; - - kernel void test( - write_only )" << image_test::image_type_name() << R"(_t img, - const global int4 *coords, - const global element_type *input - ) { - const ulong gid = get_global_linear_id(); - - write_image)" << image_test::function_suffix() << - "(img, coords[gid]." << image_test::coord_accessor() << R"(, input[gid]); - } - )"; - - return s.str(); - } -#else - std::string generate_source() - { - std::stringstream s; - s << R"( - #include - #include - #include - #include - using namespace cl; - )"; - - s << R"( - typedef )" << type_name() << R"( element_type; - - kernel void test( - )" << image_test::image_type_name() << R"( img, - const global_ptr coords, - const global_ptr input - ) { - const ulong gid = get_global_linear_id(); - - img.write(coords[gid].)" << image_test::coord_accessor() << R"(, input[gid]); - } - )"; - - return s.str(); - } -#endif - - int run(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) - { - int error = CL_SUCCESS; - - cl_program program; - cl_kernel kernel; - - std::string kernel_name = "test"; - std::string source = generate_source(); - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - error = create_opencl_kernel( - context, &program, &kernel, - source, kernel_name - ); - RETURN_ON_ERROR(error) - return error; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - error = create_opencl_kernel( - context, &program, &kernel, - source, kernel_name, "-cl-std=CL2.0", false - ); - RETURN_ON_ERROR(error) -// Normal run -#else - error = create_opencl_kernel( - context, &program, &kernel, - source, kernel_name - ); - RETURN_ON_ERROR(error) -#endif - - using element_type = typename image_test::element_type; - using coord_type = cl_int4; - using scalar_element_type = typename scalar_type::type; - using channel_type = typename image_test::channel_type; - - cl_image_format image_format; - image_format.image_channel_order = channel_order; - image_format.image_channel_data_type = ChannelType; - - const size_t pixel_size = get_pixel_size(&image_format); - const size_t channel_count = get_channel_order_channel_count(image_format.image_channel_order); - - cl_image_desc image_desc; - image_desc.image_type = ImageType; - if (ImageType == CL_MEM_OBJECT_IMAGE1D) - { - image_desc.image_width = 2048; - image_desc.image_height = 1; - image_desc.image_depth = 1; - } - else if (ImageType == CL_MEM_OBJECT_IMAGE2D) - { - image_desc.image_width = 256; - image_desc.image_height = 256; - image_desc.image_depth = 1; - } - else if (ImageType == CL_MEM_OBJECT_IMAGE3D) - { - image_desc.image_width = 64; - image_desc.image_height = 64; - image_desc.image_depth = 64; - } - image_desc.image_array_size = 0; - image_desc.image_row_pitch = image_desc.image_width * pixel_size; - image_desc.image_slice_pitch = image_desc.image_row_pitch * image_desc.image_height; - image_desc.num_mip_levels = 0; - image_desc.num_samples = 0; - image_desc.mem_object = NULL; - - image_descriptor image_info = create_image_descriptor(image_desc, &image_format); - - std::vector random_image_values = generate_input( - image_desc.image_width * image_desc.image_height * image_desc.image_depth * channel_count, - image_test::channel_min(), image_test::channel_max(), - std::vector() - ); - - const size_t count = num_elements; - - std::vector coords = generate_input( - count, - detail::make_value(0), - coord_type { - static_cast(image_desc.image_width - 1), - static_cast(image_desc.image_height - 1), - static_cast(image_desc.image_depth - 1), - 0 - }, - std::vector() - ); - - std::vector input(count); - for (size_t i = 0; i < count; i++) - { - const coord_type c = coords[i]; - - // Use read_image_pixel from harness/imageHelpers to fill input values - // (it will deal with correct channels, orders etc.) - read_image_pixel(static_cast(random_image_values.data()), &image_info, - c.s[0], c.s[1], c.s[2], - input[i].s); - } - - // image_row_pitch and image_slice_pitch must be 0, when clCreateImage is used with host_ptr = NULL - image_desc.image_row_pitch = 0; - image_desc.image_slice_pitch = 0; - cl_mem img = clCreateImage(context, CL_MEM_WRITE_ONLY, - &image_format, &image_desc, NULL, &error); - RETURN_ON_CL_ERROR(error, "clCreateImage") - - cl_mem coords_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - sizeof(coord_type) * count, static_cast(coords.data()), &error); - RETURN_ON_CL_ERROR(error, "clCreateBuffer") - - cl_mem input_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, - sizeof(element_type) * count, static_cast(input.data()), &error); - RETURN_ON_CL_ERROR(error, "clCreateBuffer") - - error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &img); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - error = clSetKernelArg(kernel, 1, sizeof(coords_buffer), &coords_buffer); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - error = clSetKernelArg(kernel, 2, sizeof(input_buffer), &input_buffer); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - - const size_t global_size = count; - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel") - - std::vector image_values(image_desc.image_width * image_desc.image_height * image_desc.image_depth * channel_count); - - const size_t origin[3] = { 0 }; - const size_t region[3] = { image_desc.image_width, image_desc.image_height, image_desc.image_depth }; - error = clEnqueueReadImage( - queue, img, CL_TRUE, - origin, region, 0, 0, - static_cast(image_values.data()), - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer") - - for (size_t i = 0; i < count; i++) - { - const coord_type c = coords[i]; - const element_type expected = input[i]; - - element_type result; - read_image_pixel(static_cast(image_values.data()), &image_info, - c.s[0], c.s[1], c.s[2], - result.s); - - if (!are_equal(result, expected)) - { - RETURN_ON_ERROR_MSG(-1, - "Writing to coordinates %s failed. Expected: %s, got: %s", - format_value(c).c_str(), format_value(expected).c_str(), format_value(result).c_str() - ); - } - } - - clReleaseMemObject(img); - clReleaseMemObject(coords_buffer); - clReleaseMemObject(input_buffer); - clReleaseKernel(kernel); - clReleaseProgram(program); - return error; - } -}; - -template -int run_test_cases(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - if (!is_test_supported(device)) - return CL_SUCCESS; - - int error = CL_SUCCESS; - - for (auto channel_order : get_channel_orders(device)) - { - error = image_test(channel_order) - .run(device, context, queue, num_elements); - RETURN_ON_ERROR(error) - error = image_test(channel_order) - .run(device, context, queue, num_elements); - RETURN_ON_ERROR(error) - error = image_test(channel_order) - .run(device, context, queue, num_elements); - RETURN_ON_ERROR(error) - - error = image_test(channel_order) - .run(device, context, queue, num_elements); - RETURN_ON_ERROR(error) - error = image_test(channel_order) - .run(device, context, queue, num_elements); - RETURN_ON_ERROR(error) - error = image_test(channel_order) - .run(device, context, queue, num_elements); - RETURN_ON_ERROR(error) - - error = image_test(channel_order) - .run(device, context, queue, num_elements); - RETURN_ON_ERROR(error) - } - - return error; -} - - -AUTO_TEST_CASE(test_images_write_1d) -(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - return run_test_cases(device, context, queue, num_elements); -} - -AUTO_TEST_CASE(test_images_write_2d) -(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - return run_test_cases(device, context, queue, num_elements); -} - -AUTO_TEST_CASE(test_images_write_3d) -(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - return run_test_cases(device, context, queue, num_elements); -} - -} // namespace - -#endif // TEST_CONFORMANCE_CLCPP_IMAGES_TEST_WRITE_HPP diff --git a/test_conformance/clcpp/integer_funcs/24bit_funcs.hpp b/test_conformance/clcpp/integer_funcs/24bit_funcs.hpp deleted file mode 100644 index 98da450b43..0000000000 --- a/test_conformance/clcpp/integer_funcs/24bit_funcs.hpp +++ /dev/null @@ -1,142 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_24BIT_HPP -#define TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_24BIT_HPP - -#include "common.hpp" -#include - -template -struct int_func_mad24 : public ternary_func -{ - std::string str() - { - return "mad24"; - } - - std::string headers() - { - return "#include \n"; - } - - OUT1 operator()(const IN1& x, const IN2& y, const IN3& z) - { - static_assert( - std::is_same::value - && std::is_same::value - && std::is_same::value, - "All types must be the same" - ); - static_assert( - std::is_same::value || std::is_same::value, - "Function takes only signed/unsigned integers." - ); - return (x * y) + z; - } - - IN1 min1() - { - return 0; - } - - IN1 max1() - { - return (std::numeric_limits::max)() & IN1(0x00FFFF); - } - - IN2 min2() - { - return 0; - } - - IN2 max2() - { - return (std::numeric_limits::max)() & IN2(0x00FFFF); - } -}; - -template -struct int_func_mul24 : public binary_func -{ - std::string str() - { - return "mul24"; - } - - std::string headers() - { - return "#include \n"; - } - - OUT1 operator()(const IN1& x, const IN2& y) - { - static_assert( - std::is_same::value - && std::is_same::value, - "All types must be the same" - ); - static_assert( - std::is_same::value || std::is_same::value, - "Function takes only signed/unsigned integers." - ); - return x * y; - } - - IN1 min1() - { - return 0; - } - - IN1 max1() - { - return (std::numeric_limits::max)() & IN1(0x00FFFF); - } - - IN2 min2() - { - return 0; - } - - IN2 max2() - { - return (std::numeric_limits::max)() & IN2(0x00FFFF); - } -}; - -AUTO_TEST_CASE(test_int_24bit_funcs) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - - // intn mad24(intn x, intn y, intn z); - // uintn mad24(uintn x, uintn y, uintn z); - TEST_TERNARY_FUNC_MACRO((int_func_mad24())) - TEST_TERNARY_FUNC_MACRO((int_func_mad24())) - - // intn mul24(intn x, intn y); - // uintn mul24(uintn x, uintn y); - TEST_BINARY_FUNC_MACRO((int_func_mul24())) - TEST_BINARY_FUNC_MACRO((int_func_mul24())) - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_24BIT_HPP diff --git a/test_conformance/clcpp/integer_funcs/CMakeLists.txt b/test_conformance/clcpp/integer_funcs/CMakeLists.txt deleted file mode 100644 index ba4cfe865d..0000000000 --- a/test_conformance/clcpp/integer_funcs/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -set(MODULE_NAME CPP_INTEGER_FUNCS) - -set(${MODULE_NAME}_SOURCES - main.cpp -) - -include(../../CMakeCommon.txt) diff --git a/test_conformance/clcpp/integer_funcs/bitwise_funcs.hpp b/test_conformance/clcpp/integer_funcs/bitwise_funcs.hpp deleted file mode 100644 index 13ca1563d0..0000000000 --- a/test_conformance/clcpp/integer_funcs/bitwise_funcs.hpp +++ /dev/null @@ -1,232 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_BITWISE_HPP -#define TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_BITWISE_HPP - -#include "common.hpp" -#include - -template -struct int_func_popcount : public unary_func -{ - std::string str() - { - return "popcount"; - } - - std::string headers() - { - return "#include \n"; - } - - OUT1 operator()(IN1 x) - { - OUT1 count = 0; - for (count = 0; x != 0; count++) - { - x &= x - 1; - } - return count; - } -}; - -template -struct int_func_clz : public unary_func -{ - std::string str() - { - return "clz"; - } - - std::string headers() - { - return "#include \n"; - } - - OUT1 operator()(IN1 x) - { - OUT1 count = 0; - if(std::is_unsigned::value) - { - cl_ulong value = x; - value <<= 8 * sizeof(value) - (8 * sizeof(x)); - for(count = 0; 0 == (value & (CL_LONG_MIN)); count++) - { - value <<= 1; - } - } - else - { - cl_long value = x; - value <<= 8 * sizeof(value) - (8 * sizeof(x)); - for(count = 0; 0 == (value & (CL_LONG_MIN)); count++) - { - value <<= 1; - } - } - return count; - } -}; - -template -struct int_func_ctz : public unary_func -{ - std::string str() - { - return "ctz"; - } - - std::string headers() - { - return "#include \n"; - } - - OUT1 operator()(IN1 x) - { - if(x == 0) - return sizeof(x); - - OUT1 count = 0; - IN1 value = x; - for(count = 0; 0 == (value & 0x1); count++) - { - value >>= 1; - } - return count; - } -}; - -template -struct int_func_rotate : public binary_func -{ - std::string str() - { - return "rotate"; - } - - std::string headers() - { - return "#include \n"; - } - - OUT1 operator()(IN1 value, IN2 shift) - { - static_assert( - std::is_unsigned::value, - "Only unsigned integers are supported" - ); - if ((shift &= sizeof(value)*8 - 1) == 0) - return value; - return (value << shift) | (value >> (sizeof(value)*8 - shift)); - } - - IN2 min2() - { - return 0; - } - - IN2 max2() - { - return sizeof(IN1) * 8; - } -}; - -template -struct int_func_upsample : public binary_func -{ - std::string str() - { - return "upsample"; - } - - std::string headers() - { - return "#include \n"; - } - - OUT1 operator()(IN1 hi, IN2 lo) - { - static_assert( - sizeof(IN1) == sizeof(IN2), - "sizeof(IN1) != sizeof(IN2)" - ); - static_assert( - sizeof(OUT1) == 2 * sizeof(IN1), - "sizeof(OUT1) != 2 * sizeof(IN1)" - ); - static_assert( - std::is_unsigned::value, - "IN2 type must be unsigned" - ); - return (static_cast(hi) << (8*sizeof(IN1))) | lo; - } - - IN2 min2() - { - return 0; - } - - IN2 max2() - { - return sizeof(IN1) * 8; - } -}; - -AUTO_TEST_CASE(test_int_bitwise_funcs) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - - TEST_UNARY_FUNC_MACRO((int_func_popcount())) - TEST_UNARY_FUNC_MACRO((int_func_popcount())) - TEST_UNARY_FUNC_MACRO((int_func_popcount())) - TEST_UNARY_FUNC_MACRO((int_func_popcount())) - - TEST_UNARY_FUNC_MACRO((int_func_clz())) - TEST_UNARY_FUNC_MACRO((int_func_clz())) - TEST_UNARY_FUNC_MACRO((int_func_clz())) - TEST_UNARY_FUNC_MACRO((int_func_clz())) - - TEST_UNARY_FUNC_MACRO((int_func_ctz())) - TEST_UNARY_FUNC_MACRO((int_func_ctz())) - TEST_UNARY_FUNC_MACRO((int_func_ctz())) - TEST_UNARY_FUNC_MACRO((int_func_ctz())) - - TEST_BINARY_FUNC_MACRO((int_func_rotate())) - TEST_BINARY_FUNC_MACRO((int_func_rotate())) - - // shortn upsample(charn hi, ucharn lo); - TEST_BINARY_FUNC_MACRO((int_func_upsample())) - // ushortn upsample(ucharn hi, ucharn lo); - TEST_BINARY_FUNC_MACRO((int_func_upsample())) - // intn upsample(shortn hi, ushortn lo); - TEST_BINARY_FUNC_MACRO((int_func_upsample())) - // uintn upsample(ushortn hi, ushortn lo); - TEST_BINARY_FUNC_MACRO((int_func_upsample())) - // longn upsample(intn hi, uintn lo); - TEST_BINARY_FUNC_MACRO((int_func_upsample())) - // ulongn upsample(uintn hi, uintn lo); - TEST_BINARY_FUNC_MACRO((int_func_upsample())) - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_BITWISE_HPP diff --git a/test_conformance/clcpp/integer_funcs/common.hpp b/test_conformance/clcpp/integer_funcs/common.hpp deleted file mode 100644 index f04811e145..0000000000 --- a/test_conformance/clcpp/integer_funcs/common.hpp +++ /dev/null @@ -1,26 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_COMMON_HPP -#define TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_COMMON_HPP - -#include -#include -#include - -#include "../common.hpp" -#include "../funcs_test_utils.hpp" - -#endif // TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_COMMON_HPP diff --git a/test_conformance/clcpp/integer_funcs/main.cpp b/test_conformance/clcpp/integer_funcs/main.cpp deleted file mode 100644 index c6cdfb616f..0000000000 --- a/test_conformance/clcpp/integer_funcs/main.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "../common.hpp" - -#include "bitwise_funcs.hpp" -#include "numeric_funcs.hpp" -#include "24bit_funcs.hpp" - -int main(int argc, const char *argv[]) -{ - auto& tests = autotest::test_suite::global_test_suite().test_defs; - return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0); -} diff --git a/test_conformance/clcpp/integer_funcs/numeric_funcs.hpp b/test_conformance/clcpp/integer_funcs/numeric_funcs.hpp deleted file mode 100644 index 21d75c5acd..0000000000 --- a/test_conformance/clcpp/integer_funcs/numeric_funcs.hpp +++ /dev/null @@ -1,703 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_NUMERIC_HPP -#define TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_NUMERIC_HPP - -#include "common.hpp" -#include - -template -struct int_func_abs : public unary_func -{ - std::string str() - { - return "abs"; - } - - std::string headers() - { - return "#include \n"; - } - - OUT1 operator()(const IN1& x) - { - static_assert( - std::is_unsigned::value, - "OUT1 type must be unsigned" - ); - if(x < IN1(0)) - return static_cast(-x); - return static_cast(x); - } -}; - -template -struct int_func_abs_diff : public binary_func -{ - std::string str() - { - return "abs_diff"; - } - - std::string headers() - { - return "#include \n"; - } - - OUT1 operator()(const IN1& x, const IN2& y) - { - static_assert( - std::is_same::value, - "IN1 must be IN2" - ); - static_assert( - std::is_unsigned::value, - "OUT1 type must be unsigned" - ); - if(x < y) - return static_cast(y-x); - return static_cast(x-y); - } -}; - -template -struct int_func_add_sat : public binary_func -{ - std::string str() - { - return "add_sat"; - } - - std::string headers() - { - return "#include \n"; - } - - OUT1 operator()(const IN1& x, const IN2& y) - { - static_assert( - std::is_same::value, - "IN1 must be IN2" - ); - static_assert( - std::is_same::value, - "OUT1 must be IN2" - ); - // sat unsigned integers - if(std::is_unsigned::value) - { - OUT1 z = x + y; - if(z < x || z < y) - return (std::numeric_limits::max)(); - return z; - } - // sat signed integers - OUT1 z = x + y; - if(y > 0) - { - if(z < x) - return (std::numeric_limits::max)(); - } - else - { - if(z > x) - return (std::numeric_limits::min)(); - } - return z; - } -}; - -template -struct int_func_hadd : public binary_func -{ - std::string str() - { - return "hadd"; - } - - std::string headers() - { - return "#include \n"; - } - - OUT1 operator()(const IN1& x, const IN2& y) - { - static_assert( - std::is_same::value, - "IN1 must be IN2" - ); - static_assert( - std::is_same::value, - "OUT1 must be IN2" - ); - return (x >> OUT1(1)) + (y >> OUT1(1)) + (x & y & OUT1(1)); - } -}; - -template -struct int_func_rhadd : public binary_func -{ - std::string str() - { - return "rhadd"; - } - - std::string headers() - { - return "#include \n"; - } - - OUT1 operator()(const IN1& x, const IN2& y) - { - static_assert( - std::is_same::value, - "IN1 must be IN2" - ); - static_assert( - std::is_same::value, - "OUT1 must be IN2" - ); - return (x >> OUT1(1)) + (y >> OUT1(1)) + ((x | y) & OUT1(1)); - } -}; - -// clamp for scalars -template -struct int_func_clamp : public ternary_func -{ - std::string str() - { - return "clamp"; - } - - std::string headers() - { - return "#include \n"; - } - - OUT1 operator()(const IN1& x, const IN2& minval, const IN3& maxval) - { - static_assert( - std::is_same::value, - "IN3 must be IN2" - ); - static_assert( - std::is_same::value, - "OUT1 must be IN1" - ); - return (std::min)((std::max)(x, minval), maxval); - } - - IN2 min2() - { - return (std::numeric_limits::min)(); - } - - IN2 max2() - { - return (std::numeric_limits::max)() / IN2(2); - } - - IN3 min3() - { - return IN3(1) + ((std::numeric_limits::max)() / IN3(2)); - } - - IN3 max3() - { - return (std::numeric_limits::max)(); - } -}; - -// gentype clamp(gentype x, scalar minval, scalar maxval); -template -struct int_func_clamp::value>::type> : public ternary_func -{ - std::string str() - { - return "clamp"; - } - - std::string headers() - { - return "#include \n"; - } - - OUT1 operator()(const IN1& x, const IN2& minval, const IN3& maxval) - { - static_assert( - std::is_same::value, - "IN3 must be IN2" - ); - static_assert( - !is_vector_type::value && !is_vector_type::value, - "IN3 and IN2 must be scalar" - ); - static_assert( - std::is_same::value, - "OUT1 must be IN1" - ); - OUT1 result; - for(size_t i = 0; i < vector_size::value; i++) - { - result.s[i] = (std::min)((std::max)(x.s[i], minval), maxval); - } - return result; - } - - IN1 min1() - { - typedef typename scalar_type::type SCALAR1; - IN1 min1; - for(size_t i = 0; i < vector_size::value; i++) - { - min1.s[i] = (std::numeric_limits::min)(); - } - return min1; - } - - IN1 max1() - { - typedef typename scalar_type::type SCALAR1; - IN1 max1; - for(size_t i = 0; i < vector_size::value; i++) - { - max1.s[i] = (std::numeric_limits::max)(); - } - return max1; - } - - IN2 min2() - { - return (std::numeric_limits::min)(); - } - - IN2 max2() - { - return (std::numeric_limits::max)() / IN2(2); - } - - IN3 min3() - { - return IN3(1) + ((std::numeric_limits::max)() / IN3(2)); - } - - IN3 max3() - { - return (std::numeric_limits::max)(); - } -}; - -template -struct int_func_mul_hi : public binary_func -{ - std::string str() - { - return "mul_hi"; - } - - std::string headers() - { - return "#include \n"; - } - - OUT1 operator()(const IN1& x, const IN2& y) - { - static_assert( - std::is_same::value - && std::is_same::value, - "Types must be the same" - ); - static_assert( - !std::is_same::value && !std::is_same::value, - "Operation unimplemented for 64-bit scalars" - ); - cl_long xl = static_cast(x); - cl_long yl = static_cast(y); - return static_cast((xl * yl) >> (8 * sizeof(OUT1))); - } -}; - -template -struct int_func_mad_hi : public ternary_func -{ - std::string str() - { - return "mad_hi"; - } - - std::string headers() - { - return "#include \n"; - } - - OUT1 operator()(const IN1& x, const IN2& y, const IN3& z) - { - static_assert( - std::is_same::value - && std::is_same::value - && std::is_same::value, - "Types must be the same" - ); - return int_func_mul_hi()(x, y) + z; - } -}; - -// This test is implemented only for unsigned integers -template -struct int_func_mad_sat : public ternary_func -{ - std::string str() - { - return "mad_sat"; - } - - std::string headers() - { - return "#include \n"; - } - - OUT1 operator()(const IN1& x, const IN2& y, const IN3& z) - { - static_assert( - std::is_same::value - && std::is_same::value - && std::is_same::value, - "Types must be the same" - ); - static_assert( - std::is_unsigned::value, - "Test operation is not implemented for signed integers" - ); - // mad_sat unsigned integers - OUT1 w1 = (x * y); - if (x != 0 && w1 / x != y) - return (std::numeric_limits::max)(); - OUT1 w2 = w1 + z; - if(w2 < w1) - return (std::numeric_limits::max)(); - return w2; - } -}; - -template -struct int_func_sub_sat : public binary_func -{ - std::string str() - { - return "sub_sat"; - } - - std::string headers() - { - return "#include \n"; - } - - OUT1 operator()(const IN1& x, const IN2& y) - { - static_assert( - std::is_same::value && std::is_same::value, - "IN1, IN2 and OUT1 must be the same types" - ); - // sat unsigned integers - if(std::is_unsigned::value) - { - OUT1 z = x - y; - if(x < y) - return (std::numeric_limits::min)(); - return z; - } - // sat signed integers - OUT1 z = x - y; - if(y < 0) - { - if(z < x) - return (std::numeric_limits::max)(); - } - else - { - if(z > x) - return (std::numeric_limits::min)(); - } - return z; - } -}; - -template -struct int_func_max : public binary_func -{ - std::string str() - { - return "max"; - } - - std::string headers() - { - return "#include \n"; - } - - OUT1 operator()(const IN1& x, const IN2& y) - { - static_assert( - std::is_same::value && std::is_same::value, - "IN1, IN2 and OUT1 must be the same types" - ); - return (std::max)(x, y); - } -}; - -template -struct int_func_max::value>::type> : public binary_func -{ - std::string str() - { - return "max"; - } - - std::string headers() - { - return "#include \n"; - } - - IN1 min1() - { - typedef typename scalar_type::type SCALAR1; - IN1 min1; - for(size_t i = 0; i < vector_size::value; i++) - { - min1.s[i] = (std::numeric_limits::min)(); - } - return min1; - } - - IN1 max1() - { - typedef typename scalar_type::type SCALAR1; - IN1 max1; - for(size_t i = 0; i < vector_size::value; i++) - { - max1.s[i] = (std::numeric_limits::max)(); - } - return max1; - } - - OUT1 operator()(const IN1& x, const IN2& y) - { - static_assert( - std::is_same::value, - "IN1 and OUT1 must be the same types" - ); - static_assert( - !is_vector_type::value, - "IN2 must be scalar" - ); - static_assert( - std::is_same::type, IN2>::value, - "IN2 must match with OUT1 and IN1" - ); - IN1 result = x; - for(size_t i = 0; i < vector_size::value; i++) - { - result.s[i] = (std::max)(x.s[i], y); - } - return result; - } -}; - -template -struct int_func_min : public binary_func -{ - std::string str() - { - return "min"; - } - - std::string headers() - { - return "#include \n"; - } - - OUT1 operator()(const IN1& x, const IN2& y) - { - static_assert( - std::is_same::value && std::is_same::value, - "IN1, IN2 and OUT1 must be the same types" - ); - return (std::min)(x, y); - } -}; - -template -struct int_func_min::value>::type> : public binary_func -{ - std::string str() - { - return "min"; - } - - std::string headers() - { - return "#include \n"; - } - - IN1 min1() - { - typedef typename scalar_type::type SCALAR1; - IN1 min1; - for(size_t i = 0; i < vector_size::value; i++) - { - min1.s[i] = (std::numeric_limits::min)(); - } - return min1; - } - - IN1 max1() - { - typedef typename scalar_type::type SCALAR1; - IN1 max1; - for(size_t i = 0; i < vector_size::value; i++) - { - max1.s[i] = (std::numeric_limits::max)(); - } - return max1; - } - - OUT1 operator()(const IN1& x, const IN2& y) - { - static_assert( - std::is_same::value, - "IN1 and OUT1 must be the same types" - ); - static_assert( - !is_vector_type::value, - "IN2 must be scalar" - ); - static_assert( - std::is_same::type, IN2>::value, - "IN2 must match with OUT1 and IN1" - ); - IN1 result = x; - for(size_t i = 0; i < vector_size::value; i++) - { - result.s[i] = (std::min)(x.s[i], y); - } - return result; - } -}; - -AUTO_TEST_CASE(test_int_numeric_funcs) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - - // ugentype abs(gentype x); - TEST_UNARY_FUNC_MACRO((int_func_abs())) - TEST_UNARY_FUNC_MACRO((int_func_abs())) - TEST_UNARY_FUNC_MACRO((int_func_abs())) - TEST_UNARY_FUNC_MACRO((int_func_abs())) - - // ugentype abs_diff(gentype x, gentype y); - TEST_BINARY_FUNC_MACRO((int_func_abs_diff())) - TEST_BINARY_FUNC_MACRO((int_func_abs_diff())) - TEST_BINARY_FUNC_MACRO((int_func_abs_diff())) - TEST_BINARY_FUNC_MACRO((int_func_abs_diff())) - - // gentype add_sat(gentype x, gentype y); - TEST_BINARY_FUNC_MACRO((int_func_add_sat())) - TEST_BINARY_FUNC_MACRO((int_func_add_sat())) - TEST_BINARY_FUNC_MACRO((int_func_add_sat())) - TEST_BINARY_FUNC_MACRO((int_func_add_sat())) - - // gentype hadd(gentype x, gentype y); - TEST_BINARY_FUNC_MACRO((int_func_hadd())) - TEST_BINARY_FUNC_MACRO((int_func_hadd())) - TEST_BINARY_FUNC_MACRO((int_func_hadd())) - TEST_BINARY_FUNC_MACRO((int_func_hadd())) - - // gentype rhadd(gentype x, gentype y); - TEST_BINARY_FUNC_MACRO((int_func_rhadd())) - TEST_BINARY_FUNC_MACRO((int_func_rhadd())) - TEST_BINARY_FUNC_MACRO((int_func_rhadd())) - TEST_BINARY_FUNC_MACRO((int_func_rhadd())) - - // gentype clamp(gentype x, gentype minval, gentype maxval); - TEST_TERNARY_FUNC_MACRO((int_func_clamp())) - TEST_TERNARY_FUNC_MACRO((int_func_clamp())) - TEST_TERNARY_FUNC_MACRO((int_func_clamp())) - TEST_TERNARY_FUNC_MACRO((int_func_clamp())) - - // gentype clamp(gentype x, scalar minval, scalar maxval); - TEST_TERNARY_FUNC_MACRO((int_func_clamp())) - TEST_TERNARY_FUNC_MACRO((int_func_clamp())) - TEST_TERNARY_FUNC_MACRO((int_func_clamp())) - TEST_TERNARY_FUNC_MACRO((int_func_clamp())) - - // gentype mad_hi(gentype a, gentype b, gentype c); - TEST_TERNARY_FUNC_MACRO((int_func_mad_hi())) - TEST_TERNARY_FUNC_MACRO((int_func_mad_hi())) - TEST_TERNARY_FUNC_MACRO((int_func_mad_hi())) - TEST_TERNARY_FUNC_MACRO((int_func_mad_hi())) - - // gentype mad_sat(gentype a, gentype b, gentype c); - TEST_TERNARY_FUNC_MACRO((int_func_mad_sat())) - TEST_TERNARY_FUNC_MACRO((int_func_mad_sat())) - TEST_TERNARY_FUNC_MACRO((int_func_mad_sat())) - - // gentype max(gentype x, gentype y); - TEST_BINARY_FUNC_MACRO((int_func_max())) - TEST_BINARY_FUNC_MACRO((int_func_max())) - TEST_BINARY_FUNC_MACRO((int_func_max())) - TEST_BINARY_FUNC_MACRO((int_func_max())) - - // gentype max(gentype x, scalar y); - TEST_BINARY_FUNC_MACRO((int_func_max())) - TEST_BINARY_FUNC_MACRO((int_func_max())) - TEST_BINARY_FUNC_MACRO((int_func_max())) - TEST_BINARY_FUNC_MACRO((int_func_max())) - - // gentype min(gentype x, gentype y); - TEST_BINARY_FUNC_MACRO((int_func_min())) - TEST_BINARY_FUNC_MACRO((int_func_min())) - TEST_BINARY_FUNC_MACRO((int_func_min())) - TEST_BINARY_FUNC_MACRO((int_func_min())) - - // gentype min(gentype x, scalar y); - TEST_BINARY_FUNC_MACRO((int_func_min())) - TEST_BINARY_FUNC_MACRO((int_func_min())) - TEST_BINARY_FUNC_MACRO((int_func_min())) - TEST_BINARY_FUNC_MACRO((int_func_min())) - - // gentype mul_hi(gentype x, gentype y); - TEST_BINARY_FUNC_MACRO((int_func_mul_hi())) - TEST_BINARY_FUNC_MACRO((int_func_mul_hi())) - TEST_BINARY_FUNC_MACRO((int_func_mul_hi())) - TEST_BINARY_FUNC_MACRO((int_func_mul_hi())) - - // gentype sub_sat(gentype x, gentype y); - TEST_BINARY_FUNC_MACRO((int_func_sub_sat())) - TEST_BINARY_FUNC_MACRO((int_func_sub_sat())) - TEST_BINARY_FUNC_MACRO((int_func_sub_sat())) - TEST_BINARY_FUNC_MACRO((int_func_sub_sat())) - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_NUMERIC_HPP diff --git a/test_conformance/clcpp/math_funcs/CMakeLists.txt b/test_conformance/clcpp/math_funcs/CMakeLists.txt deleted file mode 100644 index c3b56c1717..0000000000 --- a/test_conformance/clcpp/math_funcs/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -set(MODULE_NAME CPP_MATH_FUNCS) - -set(${MODULE_NAME}_SOURCES - main.cpp -) - -include(../../CMakeCommon.txt) diff --git a/test_conformance/clcpp/math_funcs/common.hpp b/test_conformance/clcpp/math_funcs/common.hpp deleted file mode 100644 index 32249056d0..0000000000 --- a/test_conformance/clcpp/math_funcs/common.hpp +++ /dev/null @@ -1,347 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_COMMON_FUNCS_HPP -#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_COMMON_FUNCS_HPP - -#include -#include - -#include "../common.hpp" -#include "../funcs_test_utils.hpp" - -#include "reference.hpp" - -#ifndef MATH_FUNCS_CLASS_NAME - #define MATH_FUNCS_CLASS_NAME(x, y) x ## _func_ ## y -#endif - -#define MATH_FUNCS_DEFINE_UNARY_FUNC1(GROUP_NAME, NAME, OCL_FUNC, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1) \ -struct MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME) : public unary_func \ -{ \ - MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME)(bool is_embedded) : m_is_embedded(is_embedded) \ - { \ - \ - } \ - \ - std::string str() \ - { \ - return #OCL_FUNC; \ - } \ - \ - std::string headers() \ - { \ - return "#include \n"; \ - } \ - /* Reference value type is cl_double */ \ - cl_double operator()(const cl_float& x) \ - { \ - return (HOST_FUNC)(static_cast(x)); \ - } \ - \ - cl_float min1() \ - { \ - return MIN1; \ - } \ - \ - cl_float max1() \ - { \ - return MAX1; \ - } \ - \ - std::vector in1_special_cases() \ - { \ - return { \ - cl_float(0.0f), \ - cl_float(-0.0f), \ - cl_float(1.0f), \ - cl_float(-1.0f), \ - cl_float(2.0f), \ - cl_float(-2.0f), \ - std::numeric_limits::infinity(), \ - -std::numeric_limits::infinity(), \ - std::numeric_limits::quiet_NaN() \ - }; \ - } \ - \ - bool use_ulp() \ - { \ - return USE_ULP; \ - } \ - \ - template \ - typename make_vector_type::value>::type \ - delta(const cl_float& in1, const T& expected) \ - { \ - typedef \ - typename make_vector_type::value>::type \ - delta_vector_type; \ - (void) in1; \ - auto e = detail::make_value(DELTA); \ - return detail::multiply(e, expected); \ - } \ - \ - float ulp() \ - { \ - if(m_is_embedded) \ - { \ - return ULP_EMBEDDED; \ - } \ - return ULP; \ - } \ -private: \ - bool m_is_embedded; \ -}; - -#define MATH_FUNCS_DEFINE_BINARY_FUNC1(GROUP_NAME, NAME, OCL_NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1, MIN2, MAX2) \ -struct MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME) : public binary_func \ -{ \ - MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME)(bool is_embedded) : m_is_embedded(is_embedded) \ - { \ - \ - } \ - \ - std::string str() \ - { \ - return #OCL_NAME; \ - } \ - \ - std::string headers() \ - { \ - return "#include \n"; \ - } \ - \ - cl_float operator()(const cl_float& x, const cl_float& y) \ - { \ - return (HOST_FUNC)(x, y); \ - } \ - \ - cl_float min1() \ - { \ - return MIN1; \ - } \ - \ - cl_float max1() \ - { \ - return MAX1; \ - } \ - \ - cl_float min2() \ - { \ - return MIN2; \ - } \ - \ - cl_float max2() \ - { \ - return MAX2; \ - } \ - \ - std::vector in1_special_cases() \ - { \ - return { \ - cl_float(0.0f), \ - cl_float(-0.0f), \ - cl_float(1.0f), \ - cl_float(-1.0f), \ - cl_float(2.0f), \ - cl_float(-2.0f), \ - std::numeric_limits::infinity(), \ - -std::numeric_limits::infinity(), \ - std::numeric_limits::quiet_NaN() \ - }; \ - } \ - \ - std::vector in2_special_cases() \ - { \ - return { \ - cl_float(0.0f), \ - cl_float(-0.0f), \ - cl_float(1.0f), \ - cl_float(-1.0f), \ - cl_float(2.0f), \ - cl_float(-2.0f), \ - std::numeric_limits::infinity(), \ - -std::numeric_limits::infinity(), \ - std::numeric_limits::quiet_NaN() \ - }; \ - } \ - \ - template \ - typename make_vector_type::value>::type \ - delta(const cl_float& in1, const cl_float& in2, const T& expected) \ - { \ - typedef \ - typename make_vector_type::value>::type \ - delta_vector_type; \ - (void) in1; \ - (void) in2; \ - auto e = detail::make_value(DELTA); \ - return detail::multiply(e, expected); \ - } \ - \ - bool use_ulp() \ - { \ - return USE_ULP; \ - } \ - \ - float ulp() \ - { \ - if(m_is_embedded) \ - { \ - return ULP_EMBEDDED; \ - } \ - return ULP; \ - } \ -private: \ - bool m_is_embedded; \ -}; - -#define MATH_FUNCS_DEFINE_TERNARY_FUNC1(GROUP_NAME, NAME, OCL_NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1, MIN2, MAX2, MIN3, MAX3) \ -struct MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME) : public ternary_func \ -{ \ - MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME)(bool is_embedded) : m_is_embedded(is_embedded) \ - { \ - \ - } \ - \ - std::string str() \ - { \ - return #OCL_NAME; \ - } \ - \ - std::string headers() \ - { \ - return "#include \n"; \ - } \ - \ - cl_double operator()(const cl_float& x, const cl_float& y, const cl_float& z) \ - { \ - return (HOST_FUNC)(static_cast(x), static_cast(y), static_cast(z)); \ - } \ - \ - cl_float min1() \ - { \ - return MIN1; \ - } \ - \ - cl_float max1() \ - { \ - return MAX1; \ - } \ - \ - cl_float min2() \ - { \ - return MIN2; \ - } \ - \ - cl_float max2() \ - { \ - return MAX2; \ - } \ - \ - cl_float min3() \ - { \ - return MIN3; \ - } \ - \ - cl_float max3() \ - { \ - return MAX3; \ - } \ - \ - std::vector in1_special_cases() \ - { \ - return { \ - cl_float(0.0f), \ - cl_float(-0.0f), \ - cl_float(1.0f), \ - cl_float(-1.0f), \ - cl_float(2.0f), \ - cl_float(-2.0f), \ - std::numeric_limits::infinity(), \ - -std::numeric_limits::infinity(), \ - std::numeric_limits::quiet_NaN() \ - }; \ - } \ - \ - std::vector in2_special_cases() \ - { \ - return { \ - cl_float(0.0f), \ - cl_float(-0.0f), \ - cl_float(1.0f), \ - cl_float(-1.0f), \ - cl_float(2.0f), \ - cl_float(-2.0f), \ - std::numeric_limits::infinity(), \ - -std::numeric_limits::infinity(), \ - std::numeric_limits::quiet_NaN() \ - }; \ - } \ - \ - std::vector in3_special_cases() \ - { \ - return { \ - cl_float(0.0f), \ - cl_float(-0.0f), \ - cl_float(1.0f), \ - cl_float(-1.0f), \ - cl_float(2.0f), \ - cl_float(-2.0f), \ - std::numeric_limits::infinity(), \ - -std::numeric_limits::infinity(), \ - std::numeric_limits::quiet_NaN() \ - }; \ - } \ - \ - template \ - typename make_vector_type::value>::type \ - delta(const cl_float& in1, const cl_float& in2, const cl_float& in3, const T& expected) \ - { \ - typedef \ - typename make_vector_type::value>::type \ - delta_vector_type; \ - (void) in1; \ - (void) in2; \ - (void) in3; \ - auto e = detail::make_value(DELTA); \ - return detail::multiply(e, expected); \ - } \ - \ - bool use_ulp() \ - { \ - return USE_ULP; \ - } \ - \ - float ulp() \ - { \ - if(m_is_embedded) \ - { \ - return ULP_EMBEDDED; \ - } \ - return ULP; \ - } \ -private: \ - bool m_is_embedded; \ -}; - -#define MATH_FUNCS_DEFINE_UNARY_FUNC(GROUP_NAME, NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1) \ - MATH_FUNCS_DEFINE_UNARY_FUNC1(GROUP_NAME, NAME, NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1) -#define MATH_FUNCS_DEFINE_BINARY_FUNC(GROUP_NAME, NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1, MIN2, MAX2) \ - MATH_FUNCS_DEFINE_BINARY_FUNC1(GROUP_NAME, NAME, NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1, MIN2, MAX2) -#define MATH_FUNCS_DEFINE_TERNARY_FUNC(GROUP_NAME, NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1, MIN2, MAX2, MIN3, MAX3) \ - MATH_FUNCS_DEFINE_TERNARY_FUNC1(GROUP_NAME, NAME, NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1, MIN2, MAX2, MIN3, MAX3) - -#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_COMMON_FUNCS_HPP diff --git a/test_conformance/clcpp/math_funcs/comparison_funcs.hpp b/test_conformance/clcpp/math_funcs/comparison_funcs.hpp deleted file mode 100644 index 0bd6ff9196..0000000000 --- a/test_conformance/clcpp/math_funcs/comparison_funcs.hpp +++ /dev/null @@ -1,59 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_COMPARISON_FUNCS_HPP -#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_COMPARISON_FUNCS_HPP - -#include -#include - -#include "common.hpp" - -// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1 -MATH_FUNCS_DEFINE_BINARY_FUNC(comparison, fdim, std::fdim, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f) -MATH_FUNCS_DEFINE_BINARY_FUNC(comparison, fmax, std::fmax, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f) -MATH_FUNCS_DEFINE_BINARY_FUNC(comparison, fmin, std::fmin, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f) -MATH_FUNCS_DEFINE_BINARY_FUNC(comparison, maxmag, reference::maxmag, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f) -MATH_FUNCS_DEFINE_BINARY_FUNC(comparison, minmag, reference::minmag, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f) - -// comparison functions -AUTO_TEST_CASE(test_comparison_funcs) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - - // Check for EMBEDDED_PROFILE - bool is_embedded_profile = false; - char profile[128]; - last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL); - RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo") - if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0) - is_embedded_profile = true; - - TEST_BINARY_FUNC_MACRO((comparison_func_fdim(is_embedded_profile))) - TEST_BINARY_FUNC_MACRO((comparison_func_fmax(is_embedded_profile))) - TEST_BINARY_FUNC_MACRO((comparison_func_fmin(is_embedded_profile))) - TEST_BINARY_FUNC_MACRO((comparison_func_maxmag(is_embedded_profile))) - TEST_BINARY_FUNC_MACRO((comparison_func_minmag(is_embedded_profile))) - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_COMPARISON_FUNCS_HPP diff --git a/test_conformance/clcpp/math_funcs/exponential_funcs.hpp b/test_conformance/clcpp/math_funcs/exponential_funcs.hpp deleted file mode 100644 index 82a8247a4e..0000000000 --- a/test_conformance/clcpp/math_funcs/exponential_funcs.hpp +++ /dev/null @@ -1,139 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_EXP_FUNCS_HPP -#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_EXP_FUNCS_HPP - -#include -#include - -#include "common.hpp" - -// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1 -MATH_FUNCS_DEFINE_UNARY_FUNC(exponential, exp, std::exp, true, 3.0f, 4.0f, 0.001f, -1000.0f, 1000.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC(exponential, expm1, std::expm1, true, 3.0f, 4.0f, 0.001f, -1000.0f, 1000.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC(exponential, exp2, std::exp2, true, 3.0f, 4.0f, 0.001f, -1000.0f, 1000.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC(exponential, exp10, reference::exp10, true, 3.0f, 4.0f, 0.001f, -1000.0f, 1000.0f) - -struct exponential_func_ldexp : public binary_func -{ - exponential_func_ldexp(bool is_embedded) : m_is_embedded(is_embedded) - { - - } - - std::string str() - { - return "ldexp"; - } - - std::string headers() - { - return "#include \n"; - } - - /* Reference value type is cl_double */ - cl_double operator()(const cl_float& x, const cl_int& y) - { - return (std::ldexp)(static_cast(x), y); - } - - cl_float min1() - { - return -1000.0f; - } - - cl_float max1() - { - return 1000.0f; - } - - cl_int min2() - { - return -8; - } - - cl_int max2() - { - return 8; - } - - std::vector in1_special_cases() - { - return { - cl_float(0.0f), - cl_float(-0.0f), - cl_float(1.0f), - cl_float(-1.0f), - cl_float(2.0f), - cl_float(-2.0f), - std::numeric_limits::infinity(), - -std::numeric_limits::infinity(), - std::numeric_limits::quiet_NaN() - }; - } - - bool use_ulp() - { - return true; - } - - float ulp() - { - if(m_is_embedded) - { - return 0.0f; - } - return 0.0f; - } -private: - bool m_is_embedded; -}; - -// exponential functions -AUTO_TEST_CASE(test_exponential_funcs) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - - // Check for EMBEDDED_PROFILE - bool is_embedded_profile = false; - char profile[128]; - last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL); - RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo") - if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0) - is_embedded_profile = true; - - // auto exp(gentype x); - // auto expm1(gentype x); - // auto exp2(gentype x); - // auto exp10(gentype x); - TEST_UNARY_FUNC_MACRO((exponential_func_exp(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((exponential_func_expm1(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((exponential_func_exp2(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((exponential_func_exp10(is_embedded_profile))) - - // auto ldexp(gentype x, intn k); - TEST_BINARY_FUNC_MACRO((exponential_func_ldexp(is_embedded_profile))) - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_EXP_FUNCS_HPP diff --git a/test_conformance/clcpp/math_funcs/floating_point_funcs.hpp b/test_conformance/clcpp/math_funcs/floating_point_funcs.hpp deleted file mode 100644 index 63b4c23047..0000000000 --- a/test_conformance/clcpp/math_funcs/floating_point_funcs.hpp +++ /dev/null @@ -1,733 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_FP_FUNCS_HPP -#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_FP_FUNCS_HPP - -#include -#include - -#include "common.hpp" - -// -------------- UNARY FUNCTIONS - -// gentype ceil(gentype x); -// gentype floor(gentype x); -// gentype rint(gentype x); -// gentype round(gentype x); -// gentype trunc(gentype x); -// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1 -MATH_FUNCS_DEFINE_UNARY_FUNC(fp, ceil, std::ceil, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC(fp, floor, std::floor, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC(fp, rint, std::rint, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC(fp, round, std::round, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC(fp, trunc, std::trunc, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f) - -// floatn nan(uintn nancode); -struct fp_func_nan : public unary_func -{ - std::string str() - { - return "nan"; - } - - std::string headers() - { - return "#include \n"; - } - - cl_float operator()(const cl_uint& x) - { - cl_uint r = x | 0x7fc00000U; - // cl_float and cl_int have the same size so that's correct - cl_float rf = *reinterpret_cast(&r); - return rf; - } - - cl_uint min1() - { - return 0; - } - - cl_uint max1() - { - return 100; - } - - std::vector in1_special_cases() - { - return { - 0, 1 - }; - } -}; - -// -------------- UNARY FUNCTIONS, 2ND ARG IS POINTER - -// gentype fract(gentype x, gentype* iptr); -// -// Fuction fract() returns additional value via pointer (2nd argument). In order to test -// if it's correct output buffer type is cl_float2. In first compontent we store what -// fract() function returns, and in the 2nd component we store what is returned via its -// 2nd argument (gentype* iptr). -struct fp_func_fract : public unary_func -{ - fp_func_fract(bool is_embedded) : m_is_embedded(is_embedded) - { - - } - - std::string str() - { - return "fract"; - } - - std::string headers() - { - return "#include \n"; - } - - cl_double2 operator()(const cl_float& x) - { - return reference::fract(static_cast(x)); - } - - cl_float min1() - { - return -1000.0f; - } - - cl_float max1() - { - return 1000.0f; - } - - std::vector in1_special_cases() - { - return { - cl_float(0.0f), - cl_float(-0.0f), - cl_float(1.0f), - cl_float(-1.0f), - cl_float(2.0f), - cl_float(-2.0f), - std::numeric_limits::infinity(), - -std::numeric_limits::infinity(), - std::numeric_limits::quiet_NaN() - }; - } - - bool use_ulp() - { - return true; - } - - float ulp() - { - if(m_is_embedded) - { - return 0.0f; - } - return 0.0f; - } -private: - bool m_is_embedded; -}; - -// We need to specialize generate_kernel_unary<>() function template for fp_func_fract. -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -template <> -std::string generate_kernel_unary(fp_func_fract func) -{ - return - "__kernel void test_fract(global float *input, global float2 *output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " float2 result;\n" - " float itpr = 0;\n" - " result.x = fract(input[gid], &itpr);\n" - " result.y = itpr;\n" - " output[gid] = result;\n" - "}\n"; -} -#else -template <> -std::string generate_kernel_unary(fp_func_fract func) -{ - return - "" + func.defs() + - "" + func.headers() + - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void test_fract(global_ptr input, global_ptr output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " float2 result;\n" - " float itpr = 0;\n" - " result.x = fract(input[gid], &itpr);\n" - " result.y = itpr;\n" - " output[gid] = result;\n" - "}\n"; -} -#endif - -// gentype modf(gentype x, gentype* iptr); -// -// Fuction modf() returns additional value via pointer (2nd argument). In order to test -// if it's correct output buffer type is cl_float2. In first compontent we store what -// modf() function returns, and in the 2nd component we store what is returned via its -// 2nd argument (gentype* iptr). -struct fp_func_modf : public unary_func -{ - fp_func_modf(bool is_embedded) : m_is_embedded(is_embedded) - { - - } - - std::string str() - { - return "modf"; - } - - std::string headers() - { - return "#include \n"; - } - - cl_double2 operator()(const cl_float& x) - { - cl_double2 r; - r.s[0] = (std::modf)(static_cast(x), &(r.s[1])); - return r; - } - - cl_float min1() - { - return -1000.0f; - } - - cl_float max1() - { - return 1000.0f; - } - - std::vector in1_special_cases() - { - return { - cl_float(0.0f), - cl_float(-0.0f), - cl_float(1.0f), - cl_float(-1.0f), - cl_float(2.0f), - cl_float(-2.0f), - std::numeric_limits::infinity(), - -std::numeric_limits::infinity(), - std::numeric_limits::quiet_NaN() - }; - } - - bool use_ulp() - { - return true; - } - - float ulp() - { - if(m_is_embedded) - { - return 0.0f; - } - return 0.0f; - } -private: - bool m_is_embedded; -}; - -// We need to specialize generate_kernel_unary<>() function template for fp_func_modf. -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -template <> -std::string generate_kernel_unary(fp_func_modf func) -{ - return - "__kernel void test_modf(global float *input, global float2 *output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " float2 result;\n" - " float itpr = 0;\n" - " result.x = modf(input[gid], &itpr);\n" - " result.y = itpr;\n" - " output[gid] = result;\n" - "}\n"; -} -#else -template <> -std::string generate_kernel_unary(fp_func_modf func) -{ - return - "" + func.defs() + - "" + func.headers() + - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void test_modf(global_ptr input, global_ptr output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " float2 result;\n" - " float itpr = 0;\n" - " result.x = modf(input[gid], &itpr);\n" - " result.y = itpr;\n" - " output[gid] = result;\n" - "}\n"; -} -#endif - -// gentype frexp(gentype x, intn* exp); -// -// Fuction frexp() returns additional value via pointer (2nd argument). In order to test -// if it's correct output buffer type is cl_float2. In first compontent we store what -// modf() function returns, and in the 2nd component we store what is returned via its -// 2nd argument (intn* exp). -struct fp_func_frexp : public unary_func -{ - fp_func_frexp(bool is_embedded) : m_is_embedded(is_embedded) - { - - } - - std::string str() - { - return "frexp"; - } - - std::string headers() - { - return "#include \n"; - } - - cl_double2 operator()(const cl_float& x) - { - cl_double2 r; - cl_int e; - r.s[0] = (std::frexp)(static_cast(x), &e); - r.s[1] = static_cast(e); - return r; - } - - cl_float min1() - { - return -1000.0f; - } - - cl_float max1() - { - return 1000.0f; - } - - std::vector in1_special_cases() - { - return { - cl_float(0.0f), - cl_float(-0.0f), - cl_float(1.0f), - cl_float(-1.0f), - cl_float(2.0f), - cl_float(-2.0f), - std::numeric_limits::infinity(), - -std::numeric_limits::infinity(), - std::numeric_limits::quiet_NaN() - }; - } - - bool use_ulp() - { - return true; - } - - float ulp() - { - if(m_is_embedded) - { - return 0.0f; - } - return 0.0f; - } -private: - bool m_is_embedded; -}; - -// We need to specialize generate_kernel_unary<>() function template for fp_func_frexp. -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -template <> -std::string generate_kernel_unary(fp_func_frexp func) -{ - return - "__kernel void test_frexp(global float *input, global float2 *output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " float2 result;\n" - " int itpr = 0;\n" - " result.x = frexp(input[gid], &itpr);\n" - " result.y = itpr;\n" - " output[gid] = result;\n" - "}\n"; -} -#else -template <> -std::string generate_kernel_unary(fp_func_frexp func) -{ - return - "" + func.defs() + - "" + func.headers() + - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void test_frexp(global_ptr input, global_ptr output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " float2 result;\n" - " int itpr = 0;\n" - " result.x = frexp(input[gid], &itpr);\n" - " result.y = itpr;\n" - " output[gid] = result;\n" - "}\n"; -} -#endif - -// -------------- BINARY FUNCTIONS - -// gentype copysign(gentype x, gentype y); -// gentype fmod(gentype x, gentype y); -// gentype remainder(gentype x, gentype y); -// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2 -MATH_FUNCS_DEFINE_BINARY_FUNC(fp, copysign, std::copysign, true, 0.0f, 0.0f, 0.001f, -100.0f, 100.0f, -10.0f, 10.0f) -MATH_FUNCS_DEFINE_BINARY_FUNC(fp, fmod, std::fmod, true, 0.0f, 0.0f, 0.001f, -100.0f, 100.0f, -10.0f, 10.0f) -MATH_FUNCS_DEFINE_BINARY_FUNC(fp, remainder, std::remainder, true, 0.0f, 0.001f, 0.0f, -100.0f, 100.0f, -10.0f, 10.0f) - -// In case of function float nextafter(float, float) reference function must -// operate on floats and return float. -struct fp_func_nextafter : public binary_func -{ - fp_func_nextafter(bool is_embedded) : m_is_embedded(is_embedded) - { - - } - - std::string str() - { - return "nextafter"; - } - - std::string headers() - { - return "#include \n"; - } - - /* In this case reference value type MUST BE cl_float */ - cl_float operator()(const cl_float& x, const cl_float& y) - { - return (std::nextafter)(x, y); - } - - cl_float min1() - { - return -1000.0f; - } - - cl_float max1() - { - return 500.0f; - } - - cl_float min2() - { - return 501.0f; - } - - cl_float max2() - { - return 1000.0f; - } - - std::vector in1_special_cases() - { - return { - cl_float(0.0f), - cl_float(-0.0f), - cl_float(1.0f), - cl_float(-1.0f), - cl_float(2.0f), - cl_float(-2.0f), - std::numeric_limits::infinity(), - -std::numeric_limits::infinity(), - std::numeric_limits::quiet_NaN() - }; - } - - std::vector in2_special_cases() - { - return { - cl_float(0.0f), - cl_float(-0.0f), - cl_float(1.0f), - cl_float(-1.0f), - cl_float(2.0f), - cl_float(-2.0f), - std::numeric_limits::infinity(), - -std::numeric_limits::infinity(), - std::numeric_limits::quiet_NaN() - }; - } - - bool use_ulp() - { - return true; - } - - float ulp() - { - if(m_is_embedded) - { - return 0.0f; - } - return 0.0f; - } -private: - bool m_is_embedded; -}; - -// gentype remquo(gentype x, gentype y, intn* quo); -struct fp_func_remquo : public binary_func -{ - fp_func_remquo(bool is_embedded) : m_is_embedded(is_embedded) - { - - } - - std::string str() - { - return "remquo"; - } - - std::string headers() - { - return "#include \n"; - } - - cl_double2 operator()(const cl_float& x, const cl_float& y) - { - return reference::remquo(static_cast(x), static_cast(y)); - } - - cl_float min1() - { - return -1000.0f; - } - - cl_float max1() - { - return 1000.0f; - } - - cl_float min2() - { - return -1000.0f; - } - - cl_float max2() - { - return 1000.0f; - } - - std::vector in1_special_cases() - { - return { - cl_float(0.0f), - cl_float(-0.0f), - cl_float(1.0f), - cl_float(-1.0f), - std::numeric_limits::infinity(), - -std::numeric_limits::infinity(), - std::numeric_limits::quiet_NaN() - }; - } - - std::vector in2_special_cases() - { - return { - cl_float(0.0f), - cl_float(-0.0f), - cl_float(1.0f), - cl_float(-1.0f), - std::numeric_limits::infinity(), - -std::numeric_limits::infinity(), - std::numeric_limits::quiet_NaN() - }; - } - - bool use_ulp() - { - return true; - } - - float ulp() - { - if(m_is_embedded) - { - return 0.0f; - } - return 0.0f; - } -private: - bool m_is_embedded; -}; - - -// We need to specialize generate_kernel_binary<>() function template for fp_func_remquo. -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -template <> -std::string generate_kernel_binary(fp_func_remquo func) -{ - return - "__kernel void test_remquo(global float *input1, global float *input2, global float2 *output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " float2 result;\n" - " int quo = 0;\n" - " int sign = 0;\n" - " result.x = remquo(input1[gid], input2[gid], &quo);\n" - // Specification say: - // "remquo also calculates the lower seven bits of the integral quotient x/y, - // and gives that value the same sign as x/y. It stores this signed value in - // the object pointed to by quo." - // Implemenation may save into quo more than seven bits. We need to take - // care of that here. - " sign = (quo < 0) ? -1 : 1;\n" - " quo = (quo < 0) ? -quo : quo;\n" - " quo &= 0x0000007f;\n" - " result.y = (sign < 0) ? -quo : quo;\n" - " output[gid] = result;\n" - "}\n"; -} -#else -template <> -std::string generate_kernel_binary(fp_func_remquo func) -{ - return - "" + func.defs() + - "" + func.headers() + - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void test_remquo(global_ptr input1, global_ptr input2, global_ptr output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " float2 result;\n" - " int quo = 0;\n" - " int sign = 0;\n" - " result.x = remquo(input1[gid], input2[gid], &quo);\n" - // Specification say: - // "remquo also calculates the lower seven bits of the integral quotient x/y, - // and gives that value the same sign as x/y. It stores this signed value in - // the object pointed to by quo." - // Implemenation may save into quo more than seven bits. We need to take - // care of that here. - " sign = (quo < 0) ? -1 : 1;\n" - " quo = (quo < 0) ? -quo : quo;\n" - " quo &= 0x0000007f;\n" - " result.y = (sign < 0) ? -quo : quo;\n" - " output[gid] = result;\n" - "}\n"; -} -#endif - -// -------------- TERNARY FUNCTIONS - -// gentype fma(gentype a, gentype b, gentype c); -// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2, min3, max3 -MATH_FUNCS_DEFINE_TERNARY_FUNC(fp, fma, std::fma, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f, -1000.0f, 1000.0f) - -// floating point functions -AUTO_TEST_CASE(test_fp_funcs) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - - // Check for EMBEDDED_PROFILE - bool is_embedded_profile = false; - char profile[128]; - last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL); - RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo") - if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0) - is_embedded_profile = true; - - // gentype ceil(gentype x); - TEST_UNARY_FUNC_MACRO((fp_func_ceil(is_embedded_profile))) - // gentype floor(gentype x); - TEST_UNARY_FUNC_MACRO((fp_func_floor(is_embedded_profile))) - // gentype rint(gentype x); - TEST_UNARY_FUNC_MACRO((fp_func_rint(is_embedded_profile))) - // gentype round(gentype x); - TEST_UNARY_FUNC_MACRO((fp_func_round(is_embedded_profile))) - // gentype trunc(gentype x); - TEST_UNARY_FUNC_MACRO((fp_func_trunc(is_embedded_profile))) - - // floatn nan(uintn nancode); - TEST_UNARY_FUNC_MACRO((fp_func_nan())) - - // gentype fract(gentype x, gentype* iptr); - TEST_UNARY_FUNC_MACRO((fp_func_fract(is_embedded_profile))) - // gentype modf(gentype x, gentype* iptr); - TEST_UNARY_FUNC_MACRO((fp_func_modf(is_embedded_profile))) - // gentype frexp(gentype x, intn* exp); - TEST_UNARY_FUNC_MACRO((fp_func_frexp(is_embedded_profile))) - - // gentype remainder(gentype x, gentype y); - TEST_BINARY_FUNC_MACRO((fp_func_remainder(is_embedded_profile))) - // gentype copysign(gentype x, gentype y); - TEST_BINARY_FUNC_MACRO((fp_func_copysign(is_embedded_profile))) - // gentype fmod(gentype x, gentype y); - TEST_BINARY_FUNC_MACRO((fp_func_fmod(is_embedded_profile))) - - // gentype nextafter(gentype x, gentype y); - TEST_BINARY_FUNC_MACRO((fp_func_nextafter(is_embedded_profile))) - - // gentype remquo(gentype x, gentype y, intn* quo); - TEST_BINARY_FUNC_MACRO((fp_func_remquo(is_embedded_profile))) - - // gentype fma(gentype a, gentype b, gentype c); - TEST_TERNARY_FUNC_MACRO((fp_func_fma(is_embedded_profile))) - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_FP_FUNCS_HPP diff --git a/test_conformance/clcpp/math_funcs/half_math_funcs.hpp b/test_conformance/clcpp/math_funcs/half_math_funcs.hpp deleted file mode 100644 index d72d717ce1..0000000000 --- a/test_conformance/clcpp/math_funcs/half_math_funcs.hpp +++ /dev/null @@ -1,106 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_HALF_MATH_FUNCS_HPP -#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_HALF_MATH_FUNCS_HPP - -#include -#include - -#include "common.hpp" - -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1 -MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, cos, half_cos, std::cos, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F) -MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, sin, half_sin, std::sin, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F) -MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, tan, half_tan, std::tan, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F) - -MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp, half_exp, std::exp, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp2, half_exp2, std::exp2, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp10, half_exp10, reference::exp10, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f) - -MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log, half_log, std::log, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log2, half_log2, std::log2, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log10, half_log10, std::log10, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f) - -MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, rsqrt, half_rsqrt, reference::rsqrt, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, sqrt, half_sqrt, std::sqrt, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f) - -MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, recip, half_recip, reference::recip, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f) - -// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2 -MATH_FUNCS_DEFINE_BINARY_FUNC1(half_math, divide, half_divide, reference::divide, true, 8192.0f, 8192.0f, 0.1f, -1024.0f, 1024.0f, -1024.0f, 1024.0f) -MATH_FUNCS_DEFINE_BINARY_FUNC1(half_math, powr, half_powr, reference::powr, true, 8192.0f, 8192.0f, 0.1f, -1024.0f, 1024.0f, -1024.0f, 1024.0f) -#else -// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1 -MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, cos, half_math::cos, std::cos, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F) -MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, sin, half_math::sin, std::sin, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F) -MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, tan, half_math::tan, std::tan, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F) - -MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp, half_math::exp, std::exp, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp2, half_math::exp2, std::exp2, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp10, half_math::exp10, reference::exp10, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f) - -MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log, half_math::log, std::log, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log2, half_math::log2, std::log2, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log10, half_math::log10, std::log10, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f) - -MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, rsqrt, half_math::rsqrt, reference::rsqrt, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, sqrt, half_math::sqrt, std::sqrt, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f) - -MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, recip, half_math::recip, reference::recip, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f) - -// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2 -MATH_FUNCS_DEFINE_BINARY_FUNC1(half_math, divide, half_math::divide, reference::divide, true, 8192.0f, 8192.0f, 0.1f, -1024.0f, 1024.0f, -1024.0f, 1024.0f) -MATH_FUNCS_DEFINE_BINARY_FUNC1(half_math, powr, half_math::powr, reference::powr, true, 8192.0f, 8192.0f, 0.1f, -1024.0f, 1024.0f, -1024.0f, 1024.0f) -#endif - -// comparison functions -AUTO_TEST_CASE(test_half_math_funcs) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - - // Check for EMBEDDED_PROFILE - bool is_embedded_profile = false; - char profile[128]; - last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL); - RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo") - if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0) - is_embedded_profile = true; - - TEST_UNARY_FUNC_MACRO((half_math_func_cos(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((half_math_func_sin(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((half_math_func_tan(is_embedded_profile))) - - TEST_UNARY_FUNC_MACRO((half_math_func_exp(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((half_math_func_exp2(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((half_math_func_exp10(is_embedded_profile))) - - TEST_UNARY_FUNC_MACRO((half_math_func_log(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((half_math_func_log2(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((half_math_func_log10(is_embedded_profile))) - - TEST_BINARY_FUNC_MACRO((half_math_func_divide(is_embedded_profile))) - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_HALF_MATH_FUNCS_HPP diff --git a/test_conformance/clcpp/math_funcs/logarithmic_funcs.hpp b/test_conformance/clcpp/math_funcs/logarithmic_funcs.hpp deleted file mode 100644 index 23e98302bc..0000000000 --- a/test_conformance/clcpp/math_funcs/logarithmic_funcs.hpp +++ /dev/null @@ -1,261 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_LOG_FUNCS_HPP -#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_LOG_FUNCS_HPP - -#include -#include - -#include "common.hpp" - -namespace detail -{ - -// This function reads values of FP_ILOGB0 and FP_ILOGBNAN macros defined on the device. -// OpenCL C++ Spec: -// The value of FP_ILOGB0 shall be either {INT_MIN} or {INT_MAX}. The value of FP_ILOGBNAN -// shall be either {INT_MAX} or {INT_MIN}. -int get_ilogb_nan_zero(cl_device_id device, cl_context context, cl_command_queue queue, cl_int& ilogb_nan, cl_int& ilogb_zero) -{ - cl_mem buffers[1]; - cl_program program; - cl_kernel kernel; - size_t work_size[1]; - int err; - - std::string code_str = - "__kernel void get_ilogb_nan_zero(__global int *out)\n" - "{\n" - " out[0] = FP_ILOGB0;\n" - " out[1] = FP_ILOGBNAN;\n" - "}\n"; - std::string kernel_name("get_ilogb_nan_zero"); - - err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false); - RETURN_ON_ERROR(err) - - std::vector output = generate_output(2); - - buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * output.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer") - - err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]); - RETURN_ON_CL_ERROR(err, "clSetKernelArg"); - - work_size[0] = 1; - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel"); - - err = clEnqueueReadBuffer( - queue, buffers[0], CL_TRUE, 0, sizeof(cl_int) * output.size(), - static_cast(output.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer"); - - // Save - ilogb_zero = output[0]; - ilogb_nan = output[1]; - - clReleaseMemObject(buffers[0]); - clReleaseKernel(kernel); - clReleaseProgram(program); - return err; -} - -} // namespace detail - -struct logarithmic_func_ilogb : public unary_func -{ - logarithmic_func_ilogb(cl_int ilogb_nan, cl_int ilogb_zero) - : m_ilogb_nan(ilogb_nan), m_ilogb_zero(ilogb_zero) - { - - } - - std::string str() - { - return "ilogb"; - } - - std::string headers() - { - return "#include \n"; - } - - cl_int operator()(const cl_float& x) - { - if((std::isnan)(x)) - { - return m_ilogb_nan; - } - else if(x == 0.0 || x == -0.0) - { - return m_ilogb_zero; - } - static_assert( - sizeof(cl_int) == sizeof(int), - "Tests assumes that sizeof(cl_int) == sizeof(int)" - ); - return (std::ilogb)(x); - } - - cl_float min1() - { - return -100.0f; - } - - cl_float max1() - { - return 1000.0f; - } - - std::vector in1_special_cases() - { - return { - cl_float(0.0f), - cl_float(-0.0f), - cl_float(1.0f), - cl_float(-1.0f), - cl_float(2.0f), - cl_float(-2.0f), - std::numeric_limits::infinity(), - -std::numeric_limits::infinity(), - std::numeric_limits::quiet_NaN() - }; - } -private: - cl_int m_ilogb_nan; - cl_int m_ilogb_zero; -}; - -// gentype log(gentype x); -// gentype logb(gentype x); -// gentype log2(gentype x); -// gentype log10(gentype x); -// gentype log1p(gentype x); -// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1 -MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, log, std::log, true, 3.0f, 4.0f, 0.001f, -10.0f, 1000.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, logb, std::logb, true, 0.0f, 0.0f, 0.001f, -10.0f, 1000.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, log2, std::log2, true, 3.0f, 4.0f, 0.001f, -10.0f, 1000.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, log10, std::log10, true, 3.0f, 4.0f, 0.001f, -10.0f, 1000.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, log1p, std::log1p, true, 2.0f, 4.0f, 0.001f, -10.0f, 1000.0f) - -// gentype lgamma(gentype x); -// OpenCL C++ Spec.: -// The ULP values for built-in math functions lgamma and lgamma_r is currently undefined. -// Because of that we don't check ULP and set acceptable delta to 0.2f (20%). -MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, lgamma, std::lgamma, false, 0.0f, 0.0f, 0.2f, -10.0f, 1000.0f) - -// gentype lgamma_r(gentype x, intn* signp); -// OpenCL C++ Spec.: -// The ULP values for built-in math functions lgamma and lgamma_r is currently undefined. -// Because of that we don't check ULP and set acceptable delta to 0.2f (20%). -// -// Note: -// We DO NOT test if sign of the gamma function return by lgamma_r is correct. -MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, lgamma_r, std::lgamma, false, 0.0f, 0.0f, 0.2f, -10.0f, 1000.0f) - -// We need to specialize generate_kernel_unary<>() function template for logarithmic_func_lgamma_r -// because it takes two arguments, but only one of it is input, the 2nd one is used to return -// the sign of the gamma function. -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -template <> -std::string generate_kernel_unary(logarithmic_func_lgamma_r func) -{ - return - "__kernel void test_lgamma_r(global float *input, global float *output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " int sign;\n" - " output[gid] = lgamma_r(input[gid], &sign);\n" - "}\n"; -} -#else -template <> -std::string generate_kernel_unary(logarithmic_func_lgamma_r func) -{ - return - "" + func.defs() + - "" + func.headers() + - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void test_lgamma_r(global_ptr input, global_ptr output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " int sign;\n" - " output[gid] = lgamma_r(input[gid], &sign);\n" - "}\n"; -} -#endif - -// logarithmic functions -AUTO_TEST_CASE(test_logarithmic_funcs) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - - // Check for EMBEDDED_PROFILE - bool is_embedded_profile = false; - char profile[128]; - error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL); - RETURN_ON_CL_ERROR(error, "clGetDeviceInfo") - if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0) - is_embedded_profile = true; - - // Write values of FP_ILOGB0 and FP_ILOGBNAN, which are macros defined on the device, to - // ilogb_zero and ilogb_nan. - cl_int ilogb_nan = 0; - cl_int ilogb_zero = 0; - error = detail::get_ilogb_nan_zero(device, context, queue, ilogb_nan, ilogb_zero); - RETURN_ON_ERROR_MSG(error, "detail::get_ilogb_nan_zero function failed"); - - // intn ilogb(gentype x); - TEST_UNARY_FUNC_MACRO((logarithmic_func_ilogb(ilogb_nan, ilogb_zero))) - - // gentype log(gentype x); - // gentype logb(gentype x); - // gentype log2(gentype x); - // gentype log10(gentype x); - // gentype log1p(gentype x); - TEST_UNARY_FUNC_MACRO((logarithmic_func_log(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((logarithmic_func_logb(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((logarithmic_func_log2(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((logarithmic_func_log10(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((logarithmic_func_log1p(is_embedded_profile))) - - // gentype lgamma(gentype x); - TEST_UNARY_FUNC_MACRO((logarithmic_func_lgamma(is_embedded_profile))) - - // gentype lgamma(gentype x); - // - // Note: - // We DO NOT test if sign of the gamma function return by lgamma_r is correct - TEST_UNARY_FUNC_MACRO((logarithmic_func_lgamma_r(is_embedded_profile))) - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_LOG_FUNCS_HPP diff --git a/test_conformance/clcpp/math_funcs/main.cpp b/test_conformance/clcpp/math_funcs/main.cpp deleted file mode 100644 index b51348712b..0000000000 --- a/test_conformance/clcpp/math_funcs/main.cpp +++ /dev/null @@ -1,50 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include - -#include "../common.hpp" - -#include "comparison_funcs.hpp" -#include "exponential_funcs.hpp" -#include "floating_point_funcs.hpp" -#include "half_math_funcs.hpp" -#include "logarithmic_funcs.hpp" -#include "other_funcs.hpp" -#include "power_funcs.hpp" -#include "trigonometric_funcs.hpp" - -int main(int argc, const char *argv[]) -{ - // Check if cl_float (float) and cl_double (double) fulfill the requirements of - // IEC 559 (IEEE 754) standard. This is required for the tests to run correctly. - if(!std::numeric_limits::is_iec559) - { - RETURN_ON_ERROR_MSG(-1, - "cl_float (float) does not fulfill the requirements of IEC 559 (IEEE 754) standard. " - "Tests won't run correctly." - ); - } - if(!std::numeric_limits::is_iec559) - { - RETURN_ON_ERROR_MSG(-1, - "cl_double (double) does not fulfill the requirements of IEC 559 (IEEE 754) standard. " - "Tests won't run correctly." - ); - } - - auto& tests = autotest::test_suite::global_test_suite().test_defs; - return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0); -} diff --git a/test_conformance/clcpp/math_funcs/other_funcs.hpp b/test_conformance/clcpp/math_funcs/other_funcs.hpp deleted file mode 100644 index f939a5674a..0000000000 --- a/test_conformance/clcpp/math_funcs/other_funcs.hpp +++ /dev/null @@ -1,75 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_OTHER_FUNCS_HPP -#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_OTHER_FUNCS_HPP - -#include -#include - -#include "common.hpp" - -// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1 -MATH_FUNCS_DEFINE_UNARY_FUNC(other, erfc, std::erfc, true, 16.0f, 16.0f, 0.001f, -1000.0f, 1000.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC(other, erf, std::erf, true, 16.0f, 16.0f, 0.001f, -1000.0f, 1000.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC(other, fabs, std::fabs, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC(other, tgamma, std::tgamma, true, 16.0f, 16.0f, 0.001f, -1000.0f, 1000.0f) - -// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2 -MATH_FUNCS_DEFINE_BINARY_FUNC(other, hypot, std::hypot, true, 4.0f, 4.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f) - -// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2, min3, max3 -MATH_FUNCS_DEFINE_TERNARY_FUNC(other, mad, reference::mad, false, 0.0f, 0.0f, 0.1f, -10.0f, 10.0f, -10.0f, 10.0f, -10.0f, 10.0f) - -// other functions -AUTO_TEST_CASE(test_other_funcs) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - - // Check for EMBEDDED_PROFILE - bool is_embedded_profile = false; - char profile[128]; - last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL); - RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo") - if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0) - is_embedded_profile = true; - - // gentype erf(gentype x); - // gentype erfc(gentype x); - TEST_UNARY_FUNC_MACRO((other_func_erfc(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((other_func_erf(is_embedded_profile))) - - // gentype fabs(gentype x); - TEST_UNARY_FUNC_MACRO((other_func_fabs(is_embedded_profile))) - - // gentype tgamma(gentype x); - TEST_UNARY_FUNC_MACRO((other_func_tgamma(is_embedded_profile))) - - // gentype hypot(gentype x, gentype y); - TEST_BINARY_FUNC_MACRO((other_func_hypot(is_embedded_profile))) - - // gentype mad(gentype a, gentype b, gentype c); - TEST_TERNARY_FUNC_MACRO((other_func_mad(is_embedded_profile))) - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_OTHER_FUNCS_HPP diff --git a/test_conformance/clcpp/math_funcs/power_funcs.hpp b/test_conformance/clcpp/math_funcs/power_funcs.hpp deleted file mode 100644 index 2ace9b357c..0000000000 --- a/test_conformance/clcpp/math_funcs/power_funcs.hpp +++ /dev/null @@ -1,153 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_POWER_FUNCS_HPP -#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_POWER_FUNCS_HPP - -#include -#include -#include - -#include "common.hpp" - -#define DEFINE_BINARY_POWER_FUNC_INT(NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, MIN1, MAX1, MIN2, MAX2) \ -struct power_func_ ## NAME : public binary_func \ -{ \ - power_func_ ## NAME(bool is_embedded) : m_is_embedded(is_embedded) \ - { \ - \ - } \ - \ - std::string str() \ - { \ - return #NAME; \ - } \ - \ - std::string headers() \ - { \ - return "#include \n"; \ - } \ - /* Reference value type is cl_double */ \ - cl_double operator()(const cl_float& x, const cl_int& y) \ - { \ - return (HOST_FUNC)(static_cast(x), y); \ - } \ - \ - cl_float min1() \ - { \ - return MIN1; \ - } \ - \ - cl_float max1() \ - { \ - return MAX1; \ - } \ - \ - cl_int min2() \ - { \ - return MIN2; \ - } \ - \ - cl_int max2() \ - { \ - return MAX2; \ - } \ - \ - std::vector in1_special_cases() \ - { \ - return { \ - cl_float(-1.0f), \ - cl_float(0.0f), \ - cl_float(-0.0f), \ - }; \ - } \ - \ - std::vector in2_special_cases() \ - { \ - return { \ - 2, 3, -1, 1, -2, 2 \ - }; \ - } \ - \ - bool use_ulp() \ - { \ - return USE_ULP; \ - } \ - \ - float ulp() \ - { \ - if(m_is_embedded) \ - { \ - return ULP_EMBEDDED; \ - } \ - return ULP; \ - } \ -private: \ - bool m_is_embedded; \ -}; - -// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1 -MATH_FUNCS_DEFINE_UNARY_FUNC(power, cbrt, std::cbrt, true, 2.0f, 4.0f, 0.001f, -1000.0f, -9.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC(power, rsqrt, reference::rsqrt, true, 2.0f, 4.0f, 0.001f, 1.0f, 100.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC(power, sqrt, std::sqrt, true, 3.0f, 4.0f, 0.001f, 1.0f, 100.0f) - -// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2 -MATH_FUNCS_DEFINE_BINARY_FUNC(power, pow, std::pow, true, 16.0f, 16.0f, 0.001f, 1.0f, 100.0f, 1.0f, 10.0f) -MATH_FUNCS_DEFINE_BINARY_FUNC(power, powr, reference::powr, true, 16.0f, 16.0f, 0.001f, 1.0f, 100.0f, 1.0f, 10.0f) - -// func_name, reference_func, use_ulp, ulp, ulp_for_embedded, min1, max1, min2, max2 -DEFINE_BINARY_POWER_FUNC_INT(pown, std::pow, true, 16.0f, 16.0f, 1.0f, 100.0f, 1, 10) -DEFINE_BINARY_POWER_FUNC_INT(rootn, reference::rootn, true, 16.0f, 16.0f, -100.0f, 100.0f, -10, 10) - -// power functions -AUTO_TEST_CASE(test_power_funcs) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - - // Check for EMBEDDED_PROFILE - bool is_embedded_profile = false; - char profile[128]; - last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL); - RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo") - if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0) - is_embedded_profile = true; - - // gentype cbrt(gentype x); - // gentype rsqrt(gentype x); - // gentype sqrt(gentype x); - TEST_UNARY_FUNC_MACRO((power_func_cbrt(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((power_func_sqrt(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((power_func_rsqrt(is_embedded_profile))) - - // gentype pow(gentype x, gentype y); - // gentype powr(gentype x, gentype y); - TEST_BINARY_FUNC_MACRO((power_func_pow(is_embedded_profile))) - TEST_BINARY_FUNC_MACRO((power_func_powr(is_embedded_profile))) - - // gentype pown(gentype x, intn y); - // gentype rootn(gentype x, intn y); - TEST_BINARY_FUNC_MACRO((power_func_pown(is_embedded_profile))) - TEST_BINARY_FUNC_MACRO((power_func_rootn(is_embedded_profile))) - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_POWER_FUNCS_HPP diff --git a/test_conformance/clcpp/math_funcs/reference.hpp b/test_conformance/clcpp/math_funcs/reference.hpp deleted file mode 100644 index 0f5fc2fc21..0000000000 --- a/test_conformance/clcpp/math_funcs/reference.hpp +++ /dev/null @@ -1,315 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_REFERENCE_HPP -#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_REFERENCE_HPP - -#include -#include -#include - -#include "../common.hpp" - -namespace reference -{ - // Reference functions for OpenCL comparison functions that - // are not already defined in STL. - cl_float maxmag(const cl_float& x, const cl_float& y) - { - if((std::abs)(x) > (std::abs)(y)) - { - return x; - } - else if((std::abs)(y) > (std::abs)(x)) - { - return y; - } - return (std::fmax)(x, y); - } - - cl_float minmag(const cl_float& x, const cl_float& y) - { - if((std::abs)(x) < (std::abs)(y)) - { - return x; - } - else if((std::abs)(y) < (std::abs)(x)) - { - return y; - } - return (std::fmin)(x, y); - } - - // Reference functions for OpenCL exp functions that - // are not already defined in STL. - cl_double exp10(const cl_double& x) - { - // 10^x = exp2( x * log2(10) ) - auto log2_10 = (std::log2)(static_cast(10.0)); - cl_double x_log2_10 = static_cast(x * log2_10); - return (std::exp2)(x_log2_10); - } - - // Reference functions for OpenCL floating point functions that - // are not already defined in STL. - cl_double2 fract(cl_double x) - { - // Copied from math_brute_force/reference_math.c - cl_double2 r; - if((std::isnan)(x)) - { - r.s[0] = std::numeric_limits::quiet_NaN(); - r.s[1] = std::numeric_limits::quiet_NaN(); - return r; - } - - r.s[0] = (std::modf)(x, &(r.s[1])); - if(r.s[0] < 0.0 ) - { - r.s[0] = 1.0f + r.s[0]; - r.s[1] -= 1.0f; - if( r.s[0] == 1.0f ) - r.s[0] = HEX_FLT(+, 1, fffffe, -, 1); - } - return r; - } - - cl_double2 remquo(cl_double x, cl_double y) - { - cl_double2 r; - // remquo return the same value that is returned by the - // remainder function - r.s[0] = (std::remainder)(x,y); - // calulcate quo - cl_double x_y = (x - r.s[0]) / y; - cl_uint quo = (std::abs)(x_y); - r.s[1] = quo & 0x0000007fU; - if(x_y < 0.0) - r.s[1] = -r.s[1]; - - // fix edge cases - if(!(std::isnan)(x) && y == 0.0) - { - r.s[1] = 0; - } - else if((std::isnan)(x) && (std::isnan)(y)) - { - r.s[1] = 0; - } - return r; - } - - // Reference functions for OpenCL half_math:: functions that - // are not already defined in STL. - cl_double divide(cl_double x, cl_double y) - { - return x / y; - } - - cl_double recip(cl_double x) - { - return 1.0 / x; - } - - // Reference functions for OpenCL other functions that - // are not already defined in STL. - cl_double mad(cl_double x, cl_double y, cl_double z) - { - return (x * y) + z; - } - - // Reference functions for OpenCL power functions that - // are not already defined in STL. - cl_double rsqrt(const cl_double& x) - { - return cl_double(1.0) / ((std::sqrt)(x)); - } - - cl_double powr(const cl_double& x, const cl_double& y) - { - //powr(x, y) returns NaN for x < 0. - if( x < 0.0 ) - return std::numeric_limits::quiet_NaN(); - - //powr ( x, NaN ) returns the NaN for x >= 0. - //powr ( NaN, y ) returns the NaN. - if((std::isnan)(x) || (std::isnan)(y) ) - return std::numeric_limits::quiet_NaN(); - - if( x == 1.0 ) - { - //powr ( +1, +-inf ) returns NaN. - if((std::abs)(y) == INFINITY ) - return std::numeric_limits::quiet_NaN(); - - //powr ( +1, y ) is 1 for finite y. (NaN handled above) - return 1.0; - } - - if( y == 0.0 ) - { - //powr ( +inf, +-0 ) returns NaN. - //powr ( +-0, +-0 ) returns NaN. - if( x == 0.0 || x == std::numeric_limits::infinity()) - return std::numeric_limits::quiet_NaN(); - - //powr ( x, +-0 ) is 1 for finite x > 0. (x <= 0, NaN, INF already handled above) - return 1.0; - } - - if( x == 0.0 ) - { - //powr ( +-0, -inf) is +inf. - //powr ( +-0, y ) is +inf for finite y < 0. - if( y < 0.0 ) - return std::numeric_limits::infinity(); - - //powr ( +-0, y ) is +0 for y > 0. (NaN, y==0 handled above) - return 0.0; - } - - // x = +inf - if( (std::isinf)(x) ) - { - if( y < 0 ) - return 0; - return std::numeric_limits::infinity(); - } - - double fabsx = (std::abs)(x); - double fabsy = (std::abs)(y); - - //y = +-inf cases - if( (std::isinf)(fabsy) ) - { - if( y < 0.0 ) - { - if( fabsx < 1.0 ) - return std::numeric_limits::infinity(); - return 0; - } - if( fabsx < 1.0 ) - return 0.0; - return std::numeric_limits::infinity(); - } - return (std::pow)(x, y); - } - - cl_double rootn(const cl_double& x, const cl_int n) - { - //rootn (x, 0) returns a NaN. - if(n == 0) - return std::numeric_limits::quiet_NaN(); - - //rootn ( x, n ) returns a NaN for x < 0 and n is even. - if(x < 0 && 0 == (n & 1)) - return std::numeric_limits::quiet_NaN(); - - if(x == 0.0) - { - if(n > 0) - { - //rootn ( +-0, n ) is +0 for even n > 0. - if(0 == (n & 1)) - { - return cl_double(0.0); - } - //rootn ( +-0, n ) is +-0 for odd n > 0. - else - { - return x; - } - } - else - { - //rootn ( +-0, n ) is +inf for even n < 0. - if(0 == ((-n) & 1)) - { - return std::numeric_limits::infinity(); - } - //rootn ( +-0, n ) is +-inf for odd n < 0. - else - { - return (std::copysign)( - std::numeric_limits::infinity(), x - ); - } - } - } - - cl_double r = (std::abs)(x); - r = (std::exp2)((std::log2)(r) / static_cast(n)); - return (std::copysign)(r, x); - } - - // Reference functions for OpenCL trigonometric functions that - // are not already defined in STL. - cl_double acospi(cl_double x) - { - return (std::acos)(x) / CL_M_PI; - } - - cl_double asinpi(cl_double x) - { - return (std::asin)(x) / CL_M_PI; - } - - cl_double atanpi(cl_double x) - { - return (std::atan)(x) / CL_M_PI; - } - - cl_double cospi(cl_double x) - { - return (std::cos)(x * CL_M_PI); - } - - cl_double sinpi(cl_double x) - { - return (std::sin)(x * CL_M_PI); - } - - cl_double tanpi(cl_double x) - { - return (std::tan)(x * CL_M_PI); - } - - cl_double atan2(cl_double x, cl_double y) - { - #if defined(WIN32) || defined(_WIN32) - // Fix edge cases for Windows - if ((std::isinf)(x) && (std::isinf)(y)) { - cl_double retval = (y > 0) ? CL_M_PI_4 : 3.f * CL_M_PI_4; - return (x > 0) ? retval : -retval; - } - #endif // defined(WIN32) || defined(_WIN32) - return (std::atan2)(x, y); - } - - cl_double atan2pi(cl_double x, cl_double y) - { - return ::reference::atan2(x, y) / CL_M_PI; - } - - cl_double2 sincos(cl_double x) - { - cl_double2 r; - r.s[0] = (std::sin)(x); - r.s[1] = (std::cos)(x); - return r; - } -} - -#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_REFERENCE_HPP diff --git a/test_conformance/clcpp/math_funcs/trigonometric_funcs.hpp b/test_conformance/clcpp/math_funcs/trigonometric_funcs.hpp deleted file mode 100644 index 343024a891..0000000000 --- a/test_conformance/clcpp/math_funcs/trigonometric_funcs.hpp +++ /dev/null @@ -1,222 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_TRI_FUNCS_HPP -#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_TRI_FUNCS_HPP - -#include -#include - -#include "common.hpp" - -// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1 -MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, acos, std::acos, true, 4.0f, 4.0f, 0.001f, -1.0f, 1.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, acosh, std::acosh, true, 4.0f, 4.0f, 0.001f, -1.0f, 1.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, acospi, reference::acospi, true, 5.0f, 5.0f, 0.001f, -1.0f, 1.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, asin, std::asin, true, 4.0f, 4.0f, 0.001f, -1.0f, 1.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, asinh, std::asinh, true, 4.0f, 4.0f, 0.001f, -1.0f, 1.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, asinpi, reference::asinpi, true, 5.0f, 5.0f, 0.001f, -1.0f, 1.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, atan, std::atan, true, 5.0f, 5.0f, 0.001f, -1.0f, 1.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, atanh, std::atanh, true, 5.0f, 5.0f, 0.001f, -1.0f, 1.0f) -MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, atanpi, reference::atanpi, true, 5.0f, 5.0f, 0.001f, -1.0f, 1.0f) - -// For (sin/cos/tan)pi functions min input value is -0.24 and max input value is 0.24, -// so (CL_M_PI * x) is never greater than CL_M_PI_F. -// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1 -MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, cos, std::cos, true, 4.0f, 4.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F) -MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, cosh, std::cosh, true, 4.0f, 4.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F) -MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, cospi, reference::cospi, true, 4.0f, 4.0f, 0.001f, -0.24, -0.24f) -MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, sin, std::sin, true, 4.0f, 4.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F) -MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, sinh, std::sinh, true, 4.0f, 4.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F) -MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, sinpi, reference::sinpi, true, 4.0f, 4.0f, 0.001f, -0.24, -0.24f) -MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, tan, std::tan, true, 5.0f, 5.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F) -MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, tanh, std::tanh, true, 5.0f, 5.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F) -MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, tanpi, reference::tanpi, true, 6.0f, 6.0f, 0.001f, -0.24, -0.24f) - -// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2 -MATH_FUNCS_DEFINE_BINARY_FUNC(trigonometric, atan2, reference::atan2, true, 6.0f, 6.0f, 0.001f, -1.0f, 1.0f, -1.0f, 1.0f) -MATH_FUNCS_DEFINE_BINARY_FUNC(trigonometric, atan2pi, reference::atan2pi, true, 6.0f, 6.0f, 0.001f, -1.0f, 1.0f, -1.0f, 1.0f) - -// gentype sincos(gentype x, gentype * cosval); -// -// Fact that second argument is a pointer is inconvenient. -// -// We don't want to modify all helper functions defined in funcs_test_utils.hpp -// that run test kernels generated based on this class and check if results are -// correct, so instead of having two output cl_float buffers, one for sines and -// one for cosines values, we use one cl_float2 output buffer (first component is -// sine, second is cosine). -// -// Below we also define specialization of generate_kernel_unary function template -// for trigonometric_func_sincos. -struct trigonometric_func_sincos : public unary_func -{ - trigonometric_func_sincos(bool is_embedded) : m_is_embedded(is_embedded) - { - - } - - std::string str() - { - return "sincos"; - } - - std::string headers() - { - return "#include \n"; - } - - /* Reference value type is cl_double */ - cl_double2 operator()(const cl_float& x) - { - return (reference::sincos)(static_cast(x)); - } - - cl_float min1() - { - return -CL_M_PI_F; - } - - cl_float max1() - { - return CL_M_PI_F; - } - - bool use_ulp() - { - return true; - } - - float ulp() - { - if(m_is_embedded) - { - return 4.0f; - } - return 4.0f; - } -private: - bool m_is_embedded; -}; - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -template <> -std::string generate_kernel_unary(trigonometric_func_sincos func) -{ - return - "__kernel void test_sincos(global float *input, global float2 *output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " float2 sine_cosine_of_x;\n" - " float cosine_of_x = 0;\n" - " sine_cosine_of_x.x = sincos(input[gid], &(cosine_of_x));\n" - " sine_cosine_of_x.y = cosine_of_x;\n" - " output[gid] = sine_cosine_of_x;\n" - "}\n"; -} -#else -template <> -std::string generate_kernel_unary(trigonometric_func_sincos func) -{ - return - "" + func.defs() + - "" + func.headers() + - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void test_sincos(global_ptr input, global_ptr output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " float2 sine_cosine_of_x;\n" - " float cosine_of_x = 0;\n" - " sine_cosine_of_x.x = sincos(input[gid], &(cosine_of_x));\n" - " sine_cosine_of_x.y = cosine_of_x;\n" - " output[gid] = sine_cosine_of_x;\n" - "}\n"; -} -#endif - -// trigonometric functions -AUTO_TEST_CASE(test_trigonometric_funcs) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - - // Check for EMBEDDED_PROFILE - bool is_embedded_profile = false; - char profile[128]; - last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL); - RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo") - if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0) - is_embedded_profile = true; - - // gentype acos(gentype x); - // gentype acosh(gentype x); - // gentype acospi(gentype x); - // gentype asin(gentype x); - // gentype asinh(gentype x); - // gentype asinpi(gentype x); - // gentype atan(gentype x); - // gentype atanh(gentype x); - // gentype atanpi(gentype x); - TEST_UNARY_FUNC_MACRO((trigonometric_func_acos(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((trigonometric_func_acosh(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((trigonometric_func_acospi(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((trigonometric_func_asin(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((trigonometric_func_asinh(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((trigonometric_func_asinpi(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((trigonometric_func_atan(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((trigonometric_func_atanh(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((trigonometric_func_atanpi(is_embedded_profile))) - - // gentype cos(gentype x); - // gentype cosh(gentype x); - // gentype cospi(gentype x); - // gentype sin(gentype x); - // gentype sinh(gentype x); - // gentype sinpi(gentype x); - // gentype tan(gentype x); - // gentype tanh(gentype x); - // gentype tanpi(gentype x); - TEST_UNARY_FUNC_MACRO((trigonometric_func_cos(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((trigonometric_func_cosh(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((trigonometric_func_cospi(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((trigonometric_func_sin(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((trigonometric_func_sinh(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((trigonometric_func_sinpi(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((trigonometric_func_tan(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((trigonometric_func_tanh(is_embedded_profile))) - TEST_UNARY_FUNC_MACRO((trigonometric_func_tanpi(is_embedded_profile))) - - // gentype atan2(gentype y, gentype x); - // gentype atan2pi(gentype y, gentype x); - TEST_BINARY_FUNC_MACRO((trigonometric_func_atan2(is_embedded_profile))) - TEST_BINARY_FUNC_MACRO((trigonometric_func_atan2pi(is_embedded_profile))) - - // gentype sincos(gentype x, gentype * cosval); - TEST_UNARY_FUNC_MACRO((trigonometric_func_sincos(is_embedded_profile))) - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_TRI_FUNCS_HPP diff --git a/test_conformance/clcpp/pipes/CMakeLists.txt b/test_conformance/clcpp/pipes/CMakeLists.txt deleted file mode 100644 index 65daae9750..0000000000 --- a/test_conformance/clcpp/pipes/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -set(MODULE_NAME CPP_PIPES) - -set(${MODULE_NAME}_SOURCES - main.cpp -) - -include(../../CMakeCommon.txt) diff --git a/test_conformance/clcpp/pipes/main.cpp b/test_conformance/clcpp/pipes/main.cpp deleted file mode 100644 index 0ed4ef68ea..0000000000 --- a/test_conformance/clcpp/pipes/main.cpp +++ /dev/null @@ -1,25 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "../common.hpp" - -#include "test_pipes.hpp" - - -int main(int argc, const char *argv[]) -{ - auto& tests = autotest::test_suite::global_test_suite().test_defs; - return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0); -} diff --git a/test_conformance/clcpp/pipes/test_pipes.hpp b/test_conformance/clcpp/pipes/test_pipes.hpp deleted file mode 100644 index 3fc30dcd99..0000000000 --- a/test_conformance/clcpp/pipes/test_pipes.hpp +++ /dev/null @@ -1,632 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_PIPES_TEST_PIPES_HPP -#define TEST_CONFORMANCE_CLCPP_PIPES_TEST_PIPES_HPP - -#include -#include -#include -#include -#include - -// Common for all OpenCL C++ tests -#include "../common.hpp" - - -namespace test_pipes { - -enum class pipe_source -{ - param, - storage -}; - -enum class pipe_operation -{ - work_item, - work_item_reservation, - work_group_reservation, - sub_group_reservation -}; - -struct test_options -{ - pipe_operation operation; - pipe_source source; - int max_packets; - int num_packets; -}; - -struct output_type -{ - cl_uint write_reservation_is_valid; - cl_uint write_success; - - cl_uint num_packets; - cl_uint max_packets; - cl_uint read_reservation_is_valid; - cl_uint read_success; - - cl_uint value; -}; - -const std::string source_common = R"( -struct output_type -{ - uint write_reservation_is_valid; - uint write_success; - - uint num_packets; - uint max_packets; - uint read_reservation_is_valid; - uint read_success; - - uint value; -}; -)"; - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -std::string generate_source(test_options options) -{ - std::stringstream s; - s << source_common; - if (options.operation == pipe_operation::work_item) - { - s << R"( - kernel void producer(write_only pipe uint out_pipe, global struct output_type *output) - { - const ulong gid = get_global_id(0); - - output[gid].write_reservation_is_valid = 1; - - uint value = gid; - output[gid].write_success = write_pipe(out_pipe, &value) == 0; - } - - kernel void consumer(read_only pipe uint in_pipe, global struct output_type *output) - { - const ulong gid = get_global_id(0); - - output[gid].num_packets = get_pipe_num_packets(in_pipe); - output[gid].max_packets = get_pipe_max_packets(in_pipe); - - output[gid].read_reservation_is_valid = 1; - - uint value; - output[gid].read_success = read_pipe(in_pipe, &value) == 0; - output[gid].value = value; - } - )"; - } - else if (options.operation == pipe_operation::work_item_reservation) - { - s << R"( - kernel void producer(write_only pipe uint out_pipe, global struct output_type *output) - { - const ulong gid = get_global_id(0); - if (gid % 2 == 1) return; - - reserve_id_t reservation = reserve_write_pipe(out_pipe, 2); - output[gid + 0].write_reservation_is_valid = is_valid_reserve_id(reservation); - output[gid + 1].write_reservation_is_valid = is_valid_reserve_id(reservation); - - uint value0 = gid + 0; - uint value1 = gid + 1; - output[gid + 0].write_success = write_pipe(out_pipe, reservation, 0, &value0) == 0; - output[gid + 1].write_success = write_pipe(out_pipe, reservation, 1, &value1) == 0; - commit_write_pipe(out_pipe, reservation); - } - - kernel void consumer(read_only pipe uint in_pipe, global struct output_type *output) - { - const ulong gid = get_global_id(0); - if (gid % 2 == 1) return; - - output[gid + 0].num_packets = get_pipe_num_packets(in_pipe); - output[gid + 0].max_packets = get_pipe_max_packets(in_pipe); - output[gid + 1].num_packets = get_pipe_num_packets(in_pipe); - output[gid + 1].max_packets = get_pipe_max_packets(in_pipe); - - reserve_id_t reservation = reserve_read_pipe(in_pipe, 2); - output[gid + 0].read_reservation_is_valid = is_valid_reserve_id(reservation); - output[gid + 1].read_reservation_is_valid = is_valid_reserve_id(reservation); - - uint value0; - uint value1; - output[gid + 0].read_success = read_pipe(in_pipe, reservation, 1, &value0) == 0; - output[gid + 1].read_success = read_pipe(in_pipe, reservation, 0, &value1) == 0; - commit_read_pipe(in_pipe, reservation); - output[gid + 0].value = value0; - output[gid + 1].value = value1; - } - )"; - } - else if (options.operation == pipe_operation::work_group_reservation) - { - s << R"( - kernel void producer(write_only pipe uint out_pipe, global struct output_type *output) - { - const ulong gid = get_global_id(0); - - reserve_id_t reservation = work_group_reserve_write_pipe(out_pipe, get_local_size(0)); - output[gid].write_reservation_is_valid = is_valid_reserve_id(reservation); - - uint value = gid; - output[gid].write_success = write_pipe(out_pipe, reservation, get_local_id(0), &value) == 0; - work_group_commit_write_pipe(out_pipe, reservation); - } - - kernel void consumer(read_only pipe uint in_pipe, global struct output_type *output) - { - const ulong gid = get_global_id(0); - - output[gid].num_packets = get_pipe_num_packets(in_pipe); - output[gid].max_packets = get_pipe_max_packets(in_pipe); - - reserve_id_t reservation = work_group_reserve_read_pipe(in_pipe, get_local_size(0)); - output[gid].read_reservation_is_valid = is_valid_reserve_id(reservation); - - uint value; - output[gid].read_success = read_pipe(in_pipe, reservation, get_local_size(0) - 1 - get_local_id(0), &value) == 0; - work_group_commit_read_pipe(in_pipe, reservation); - output[gid].value = value; - } - )"; - } - else if (options.operation == pipe_operation::sub_group_reservation) - { - s << R"( - #pragma OPENCL EXTENSION cl_khr_subgroups : enable - - kernel void producer(write_only pipe uint out_pipe, global struct output_type *output) - { - const ulong gid = get_global_id(0); - - reserve_id_t reservation = sub_group_reserve_write_pipe(out_pipe, get_sub_group_size()); - output[gid].write_reservation_is_valid = is_valid_reserve_id(reservation); - - uint value = gid; - output[gid].write_success = write_pipe(out_pipe, reservation, get_sub_group_local_id(), &value) == 0; - sub_group_commit_write_pipe(out_pipe, reservation); - } - - kernel void consumer(read_only pipe uint in_pipe, global struct output_type *output) - { - const ulong gid = get_global_id(0); - - output[gid].num_packets = get_pipe_num_packets(in_pipe); - output[gid].max_packets = get_pipe_max_packets(in_pipe); - - reserve_id_t reservation = sub_group_reserve_read_pipe(in_pipe, get_sub_group_size()); - output[gid].read_reservation_is_valid = is_valid_reserve_id(reservation); - - uint value; - output[gid].read_success = read_pipe(in_pipe, reservation, get_sub_group_size() - 1 - get_sub_group_local_id(), &value) == 0; - sub_group_commit_read_pipe(in_pipe, reservation); - output[gid].value = value; - } - )"; - } - - return s.str(); -} -#else -std::string generate_source(test_options options) -{ - std::stringstream s; - s << R"( - #include - #include - #include - #include - #include - using namespace cl; - )"; - - s << source_common; - - std::string init_out_pipe; - std::string init_in_pipe; - if (options.source == pipe_source::param) - { - init_out_pipe = "auto out_pipe = pipe_param;"; - init_in_pipe = "auto in_pipe = pipe_param;"; - } - else if (options.source == pipe_source::storage) - { - s << "pipe_storage storage;"; - init_out_pipe = "auto out_pipe = storage.get();"; - init_in_pipe = "auto in_pipe = make_pipe(storage);"; - } - - if (options.operation == pipe_operation::work_item) - { - s << R"( - kernel void producer(pipe pipe_param, global_ptr output) - { - )" << init_out_pipe << R"( - const ulong gid = get_global_id(0); - - output[gid].write_reservation_is_valid = 1; - - uint value = gid; - output[gid].write_success = out_pipe.write(value); - } - - kernel void consumer(pipe pipe_param, global_ptr output) - { - )" << init_in_pipe << R"( - const ulong gid = get_global_id(0); - - output[gid].num_packets = in_pipe.num_packets(); - output[gid].max_packets = in_pipe.max_packets(); - - output[gid].read_reservation_is_valid = 1; - - uint value; - output[gid].read_success = in_pipe.read(value); - output[gid].value = value; - } - )"; - } - else if (options.operation == pipe_operation::work_item_reservation) - { - s << R"( - kernel void producer(pipe pipe_param, global_ptr output) - { - )" << init_out_pipe << R"( - const ulong gid = get_global_id(0); - if (gid % 2 == 1) return; - - auto reservation = out_pipe.reserve(2); - output[gid + 0].write_reservation_is_valid = reservation.is_valid(); - output[gid + 1].write_reservation_is_valid = reservation.is_valid(); - - uint value0 = gid + 0; - uint value1 = gid + 1; - output[gid + 0].write_success = reservation.write(0, value0); - output[gid + 1].write_success = reservation.write(1, value1); - reservation.commit(); - } - - kernel void consumer(pipe pipe_param, global_ptr output) - { - )" << init_in_pipe << R"( - const ulong gid = get_global_id(0); - if (gid % 2 == 1) return; - - output[gid + 0].num_packets = in_pipe.num_packets(); - output[gid + 0].max_packets = in_pipe.max_packets(); - output[gid + 1].num_packets = in_pipe.num_packets(); - output[gid + 1].max_packets = in_pipe.max_packets(); - - auto reservation = in_pipe.reserve(2); - output[gid + 0].read_reservation_is_valid = reservation.is_valid(); - output[gid + 1].read_reservation_is_valid = reservation.is_valid(); - - uint value0; - uint value1; - output[gid + 0].read_success = reservation.read(1, value0); - output[gid + 1].read_success = reservation.read(0, value1); - reservation.commit(); - output[gid + 0].value = value0; - output[gid + 1].value = value1; - } - )"; - } - else if (options.operation == pipe_operation::work_group_reservation) - { - s << R"( - kernel void producer(pipe pipe_param, global_ptr output) - { - )" << init_out_pipe << R"( - const ulong gid = get_global_id(0); - - auto reservation = out_pipe.work_group_reserve(get_local_size(0)); - output[gid].write_reservation_is_valid = reservation.is_valid(); - - uint value = gid; - output[gid].write_success = reservation.write(get_local_id(0), value); - reservation.commit(); - } - - kernel void consumer(pipe pipe_param, global_ptr output) - { - )" << init_in_pipe << R"( - const ulong gid = get_global_id(0); - - output[gid].num_packets = in_pipe.num_packets(); - output[gid].max_packets = in_pipe.max_packets(); - - auto reservation = in_pipe.work_group_reserve(get_local_size(0)); - output[gid].read_reservation_is_valid = reservation.is_valid(); - - uint value; - output[gid].read_success = reservation.read(get_local_size(0) - 1 - get_local_id(0), value); - reservation.commit(); - output[gid].value = value; - } - )"; - } - else if (options.operation == pipe_operation::sub_group_reservation) - { - s << R"( - kernel void producer(pipe pipe_param, global_ptr output) - { - )" << init_out_pipe << R"( - const ulong gid = get_global_id(0); - - auto reservation = out_pipe.sub_group_reserve(get_sub_group_size()); - output[gid].write_reservation_is_valid = reservation.is_valid(); - - uint value = gid; - output[gid].write_success = reservation.write(get_sub_group_local_id(), value); - reservation.commit(); - } - - kernel void consumer(pipe pipe_param, global_ptr output) - { - )" << init_in_pipe << R"( - const ulong gid = get_global_id(0); - - output[gid].num_packets = in_pipe.num_packets(); - output[gid].max_packets = in_pipe.max_packets(); - - auto reservation = in_pipe.sub_group_reserve(get_sub_group_size()); - output[gid].read_reservation_is_valid = reservation.is_valid(); - - uint value; - output[gid].read_success = reservation.read(get_sub_group_size() - 1 - get_sub_group_local_id(), value); - reservation.commit(); - output[gid].value = value; - } - )"; - } - - return s.str(); -} -#endif - -int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options) -{ - int error = CL_SUCCESS; - - if (options.num_packets % 2 != 0 || options.max_packets < options.num_packets) - { - RETURN_ON_ERROR_MSG(-1, "Invalid test options") - } - -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - if (options.operation == pipe_operation::sub_group_reservation && !is_extension_available(device, "cl_khr_subgroups")) - { - log_info("SKIPPED: Extension `cl_khr_subgroups` is not supported. Skipping tests.\n"); - return CL_SUCCESS; - } -#endif - - cl_program program; - cl_kernel producer_kernel; - cl_kernel consumer_kernel; - - std::string producer_kernel_name = "producer"; - std::string consumer_kernel_name = "consumer"; - std::string source = generate_source(options); - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - error = create_opencl_kernel( - context, &program, &producer_kernel, - source, producer_kernel_name - ); - RETURN_ON_ERROR(error) - return error; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - error = create_opencl_kernel( - context, &program, &producer_kernel, - source, producer_kernel_name, "-cl-std=CL2.0", false - ); - RETURN_ON_ERROR(error) - consumer_kernel = clCreateKernel(program, consumer_kernel_name.c_str(), &error); - RETURN_ON_CL_ERROR(error, "clCreateKernel") -// Normal run -#else - error = create_opencl_kernel( - context, &program, &producer_kernel, - source, producer_kernel_name - ); - RETURN_ON_ERROR(error) - consumer_kernel = clCreateKernel(program, consumer_kernel_name.c_str(), &error); - RETURN_ON_CL_ERROR(error, "clCreateKernel") -#endif - - size_t max_work_group_size; - error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &max_work_group_size, NULL); - RETURN_ON_CL_ERROR(error, "clGetDeviceInfo") - - const size_t count = options.num_packets; - const size_t local_size = (std::min)((size_t)256, max_work_group_size); - const size_t global_size = count; - - const cl_uint packet_size = sizeof(cl_uint); - - cl_mem pipe = clCreatePipe(context, 0, packet_size, options.max_packets, NULL, &error); - RETURN_ON_CL_ERROR(error, "clCreatePipe") - - cl_mem output_buffer; - output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(output_type) * count, NULL, &error); - RETURN_ON_CL_ERROR(error, "clCreateBuffer") - - const char pattern = 0; - error = clEnqueueFillBuffer(queue, output_buffer, &pattern, sizeof(pattern), 0, sizeof(output_type) * count, 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueFillBuffer") - - error = clSetKernelArg(producer_kernel, 0, sizeof(cl_mem), &pipe); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - error = clSetKernelArg(producer_kernel, 1, sizeof(output_buffer), &output_buffer); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - - error = clEnqueueNDRangeKernel(queue, producer_kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel") - - error = clSetKernelArg(consumer_kernel, 0, sizeof(cl_mem), &pipe); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - error = clSetKernelArg(consumer_kernel, 1, sizeof(output_buffer), &output_buffer); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - - error = clEnqueueNDRangeKernel(queue, consumer_kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel") - - std::vector output(count); - error = clEnqueueReadBuffer( - queue, output_buffer, CL_TRUE, - 0, sizeof(output_type) * count, - static_cast(output.data()), - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer") - - std::vector existing_values(count, false); - for (size_t gid = 0; gid < count; gid++) - { - const output_type &o = output[gid]; - - if (!o.write_reservation_is_valid) - { - RETURN_ON_ERROR_MSG(-1, "write reservation is not valid") - } - if (!o.write_success) - { - RETURN_ON_ERROR_MSG(-1, "write did not succeed") - } - - if (o.num_packets == 0 || o.num_packets > options.num_packets) - { - RETURN_ON_ERROR_MSG(-1, "num_packets did not return correct value") - } - if (o.max_packets != options.max_packets) - { - RETURN_ON_ERROR_MSG(-1, "max_packets did not return correct value") - } - if (!o.read_reservation_is_valid) - { - RETURN_ON_ERROR_MSG(-1, "read reservation is not valid") - } - if (!o.read_success) - { - RETURN_ON_ERROR_MSG(-1, "read did not succeed") - } - - // Every value must be presented once in any order - if (o.value >= count || existing_values[o.value]) - { - RETURN_ON_ERROR_MSG(-1, "kernel did not return correct value") - } - existing_values[o.value] = true; - } - - clReleaseMemObject(pipe); - clReleaseMemObject(output_buffer); - clReleaseKernel(producer_kernel); - clReleaseKernel(consumer_kernel); - clReleaseProgram(program); - return error; -} - -const pipe_operation pipe_operations[] = { - pipe_operation::work_item, - pipe_operation::work_item_reservation, - pipe_operation::work_group_reservation, - pipe_operation::sub_group_reservation -}; - -const std::tuple max_and_num_packets[] = { - std::make_tuple(2, 2), - std::make_tuple(10, 8), - std::make_tuple(256, 254), - std::make_tuple(1 << 16, 1 << 16), - std::make_tuple((1 << 16) + 5, 1 << 16), - std::make_tuple(12345, 12344), - std::make_tuple(1 << 18, 1 << 18) -}; - -AUTO_TEST_CASE(test_pipes_pipe) -(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - std::vector> ps; - for (auto p : max_and_num_packets) - { - if (std::get<0>(p) < num_elements) - ps.push_back(p); - } - ps.push_back(std::tuple(num_elements, num_elements)); - - int error = CL_SUCCESS; - - for (auto operation : pipe_operations) - for (auto p : ps) - { - test_options options; - options.source = pipe_source::param; - options.max_packets = std::get<0>(p); - options.num_packets = std::get<1>(p); - options.operation = operation; - - error = test(device, context, queue, options); - RETURN_ON_ERROR(error) - } - - return error; -} - -AUTO_TEST_CASE(test_pipes_pipe_storage) -(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - std::vector> ps; - for (auto p : max_and_num_packets) - { - if (std::get<0>(p) < num_elements) - ps.push_back(p); - } - ps.push_back(std::tuple(num_elements, num_elements)); - - int error = CL_SUCCESS; - - for (auto operation : pipe_operations) - for (auto p : ps) - { - test_options options; - options.source = pipe_source::storage; - options.max_packets = std::get<0>(p); - options.num_packets = std::get<1>(p); - options.operation = operation; - - error = test(device, context, queue, options); - RETURN_ON_ERROR(error) - } - - return error; -} - -} // namespace - -#endif // TEST_CONFORMANCE_CLCPP_PIPES_TEST_PIPES_HPP diff --git a/test_conformance/clcpp/program_scope_ctors_dtors/CMakeLists.txt b/test_conformance/clcpp/program_scope_ctors_dtors/CMakeLists.txt deleted file mode 100644 index fd36d3006e..0000000000 --- a/test_conformance/clcpp/program_scope_ctors_dtors/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -set(MODULE_NAME CPP_PROGRAM_SCOPE_CTORS_DTORS) - -set(${MODULE_NAME}_SOURCES - main.cpp -) - -include(../../CMakeCommon.txt) diff --git a/test_conformance/clcpp/program_scope_ctors_dtors/common.hpp b/test_conformance/clcpp/program_scope_ctors_dtors/common.hpp deleted file mode 100644 index 9eb17f9212..0000000000 --- a/test_conformance/clcpp/program_scope_ctors_dtors/common.hpp +++ /dev/null @@ -1,283 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_PS_CTORS_DTORS_COMMON_HPP -#define TEST_CONFORMANCE_CLCPP_PS_CTORS_DTORS_COMMON_HPP - -#include - -#include "../common.hpp" -#include "../funcs_test_utils.hpp" - -#define RUN_PS_CTORS_DTORS_TEST_MACRO(TEST_CLASS) \ - last_error = run_ps_ctor_dtor_test( \ - device, context, queue, count, TEST_CLASS \ - ); \ - CHECK_ERROR(last_error) \ - error |= last_error; - -// Base class for all tests for kernels with program scope object with -// non-trivial ctors and/or dtors -struct ps_ctors_dtors_test_base : public detail::base_func_type -{ - // ctor is true, if and only if OpenCL program of this test contains program - // scope variable with non-trivial ctor. - // dtor is true, if and only if OpenCL program of this test contains program - // scope variable with non-trivial dtor. - ps_ctors_dtors_test_base(const bool ctor, - const bool dtor) - : m_ctor(ctor), m_dtor(dtor) - { - - } - virtual ~ps_ctors_dtors_test_base() { }; - // Returns test name - virtual std::string str() = 0; - // Returns OpenCL program source - virtual std::string generate_program() = 0; - // Returns kernel names IN ORDER - virtual std::vector get_kernel_names() - { - // Typical case, that is, only one kernel - return { this->get_kernel_name() }; - } - // Returns value that is expected to be in output_buffer[i] - virtual cl_uint operator()(size_t i) = 0; - // Executes kernels - // Typical case: execute every kernel once, every kernel has only - // one argument, that is, output buffer - virtual cl_int execute(const std::vector& kernels, - cl_mem& output_buffer, - cl_command_queue& queue, - size_t work_size) - { - cl_int err; - for(auto& k : kernels) - { - err = clSetKernelArg(k, 0, sizeof(output_buffer), &output_buffer); - RETURN_ON_CL_ERROR(err, "clSetKernelArg"); - - err = clEnqueueNDRangeKernel( - queue, k, 1, - NULL, &work_size, NULL, - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel"); - } - return err; - } - // This method check if queries for CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT - // and CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT using clGetProgramInfo() - // return correct values - virtual cl_int ctors_dtors_present_queries(cl_program program) - { - cl_int error = CL_SUCCESS; - #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - return error; - #else - // CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT cl_bool - // This indicates that the program object contains non-trivial constructor(s) that will be - // executed by runtime before any kernel from the program is executed. - - // CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT cl_bool - // This indicates that the program object contains non-trivial destructor(s) that will be - // executed by runtime when program is destroyed. - - // CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT - cl_bool ctors_present; - size_t cl_bool_size; - error = clGetProgramInfo( - program, - CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT, - sizeof(cl_bool), - static_cast(&ctors_present), - &cl_bool_size - ); - RETURN_ON_CL_ERROR(error, "clGetProgramInfo") - if(cl_bool_size != sizeof(cl_bool)) - { - error = -1; - CHECK_ERROR_MSG( - error, - "Test failed, param_value_size_ret != sizeof(cl_bool) (%lu != %lu).\n", - cl_bool_size, - sizeof(cl_bool) - ); - } - - // CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT - cl_bool dtors_present = 0; - error = clGetProgramInfo( - program, - CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT, - sizeof(cl_bool), - static_cast(&ctors_present), - &cl_bool_size - ); - RETURN_ON_CL_ERROR(error, "clGetProgramInfo") - if(cl_bool_size != sizeof(cl_bool)) - { - error = -1; - CHECK_ERROR_MSG( - error, - "Test failed, param_value_size_ret != sizeof(cl_bool) (%lu != %lu).\n", - cl_bool_size, - sizeof(cl_bool) - ); - } - - // check constructors - if(m_ctor && ctors_present != CL_TRUE) - { - error = -1; - CHECK_ERROR_MSG( - error, - "Test failed, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT: 0, should be: 1.\n" - ); - } - else if(!m_ctor && ctors_present == CL_TRUE) - { - error = -1; - CHECK_ERROR_MSG( - error, - "Test failed, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT: 1, should be: 0.\n" - ); - } - - // check destructors - if(m_dtor && dtors_present != CL_TRUE) - { - error = -1; - CHECK_ERROR_MSG( - error, - "Test failed, CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT: 0, should be: 1.\n" - ); - } - else if(!m_dtor && dtors_present == CL_TRUE) - { - error = -1; - CHECK_ERROR_MSG( - error, - "Test failed, CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT: 1, should be: 0.\n" - ); - } - return error; - #endif - } - -private: - bool m_ctor; - bool m_dtor; -}; - -template -int run_ps_ctor_dtor_test(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, ps_ctor_dtor_test op) -{ - cl_mem buffers[1]; - cl_program program; - std::vector kernels; - size_t work_size[1]; - cl_int err; - - std::string code_str = op.generate_program(); - std::vector kernel_names = op.get_kernel_names(); - if(kernel_names.empty()) - { - RETURN_ON_ERROR_MSG(-1, "No kernel to run"); - } - kernels.resize(kernel_names.size()); - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0]); - return err; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0], "-cl-std=CL2.0", false); - RETURN_ON_ERROR(err) - for(size_t i = 1; i < kernels.size(); i++) - { - kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err); - RETURN_ON_CL_ERROR(err, "clCreateKernel"); - } -#else - err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0]); - RETURN_ON_ERROR(err) - for(size_t i = 1; i < kernels.size(); i++) - { - kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err); - RETURN_ON_CL_ERROR(err, "clCreateKernel"); - } -#endif - - work_size[0] = count; - // host output vector - std::vector output = generate_output(work_size[0], 9999); - - // device output buffer - buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_uint) * output.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer") - - // Execute test - err = op.execute(kernels, buffers[0], queue, work_size[0]); - RETURN_ON_ERROR(err) - - // Check if queries returns correct values - err = op.ctors_dtors_present_queries(program); - RETURN_ON_ERROR(err); - - // Release kernels and program - // Destructors should be called now - for(auto& k : kernels) - { - err = clReleaseKernel(k); - RETURN_ON_CL_ERROR(err, "clReleaseKernel"); - } - err = clReleaseProgram(program); - RETURN_ON_CL_ERROR(err, "clReleaseProgram"); - - // Finish - err = clFinish(queue); - RETURN_ON_CL_ERROR(err, "clFinish"); - - err = clEnqueueReadBuffer( - queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * output.size(), - static_cast(output.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer"); - - // Check output values - for(size_t i = 0; i < output.size(); i++) - { - cl_uint v = op(i); - if(!(are_equal(v, output[i], detail::make_value(0), op))) - { - RETURN_ON_ERROR_MSG(-1, - "test_%s(%s) failed. Expected: %s, got: %s", op.str().c_str(), type_name().c_str(), - format_value(v).c_str(), format_value(output[i]).c_str() - ); - } - } - log_info("test_%s(%s) passed\n", op.str().c_str(), type_name().c_str()); - - clReleaseMemObject(buffers[0]); - return err; -} - -#endif // TEST_CONFORMANCE_CLCPP_PS_CTORS_DTORS_COMMON_HPP diff --git a/test_conformance/clcpp/program_scope_ctors_dtors/main.cpp b/test_conformance/clcpp/program_scope_ctors_dtors/main.cpp deleted file mode 100644 index 78b077394b..0000000000 --- a/test_conformance/clcpp/program_scope_ctors_dtors/main.cpp +++ /dev/null @@ -1,24 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "../common.hpp" - -#include "test_ctors_dtors.hpp" - -int main(int argc, const char *argv[]) -{ - auto& tests = autotest::test_suite::global_test_suite().test_defs; - return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0); -} diff --git a/test_conformance/clcpp/program_scope_ctors_dtors/test_ctors_dtors.hpp b/test_conformance/clcpp/program_scope_ctors_dtors/test_ctors_dtors.hpp deleted file mode 100644 index c9ac0821ce..0000000000 --- a/test_conformance/clcpp/program_scope_ctors_dtors/test_ctors_dtors.hpp +++ /dev/null @@ -1,324 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_PS_CTORS_DTORS_TEST_CTORS_DTORS_HPP -#define TEST_CONFORMANCE_CLCPP_PS_CTORS_DTORS_TEST_CTORS_DTORS_HPP - -#include "common.hpp" - -// Test for program scope variable with non-trivial ctor -struct ps_ctor_test : public ps_ctors_dtors_test_base -{ - ps_ctor_test(const cl_uint test_value) - : ps_ctors_dtors_test_base(true, false), - m_test_value(test_value) - { - - } - - std::string str() - { - return "ps_ctor_test"; - } - - std::vector get_kernel_names() - { - return { - this->str() + "_set", - this->str() + "_read" - }; - } - - // Returns value that is expected to be in output_buffer[i] - cl_uint operator()(size_t i) - { - if(i % 2 == 0) - return m_test_value; - return cl_uint(0xbeefbeef); - } - - // In 1st kernel 0th work-tem sets member m_x of program scope variable global_var to - // m_test_value and m_y to uint(0xbeefbeef), - // In 2nd kernel: - // 1) if global id is even, then work-item reads global_var.m_x and writes it to output[its-global-id]; - // 2) otherwise, work-item reads global_var.m_y and writes it to output[its-global-id]. - std::string generate_program() - { - // ----------------------------------------------------------------------------------- - // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ - // ----------------------------------------------------------------------------------- - #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - return - "__kernel void " + this->get_kernel_names()[0] + "(global uint *output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " output[gid] = 0xbeefbeef;\n" - "}\n" - "__kernel void " + this->get_kernel_names()[1] + "(global uint *output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " if(gid % 2 == 0)\n" - " output[gid] = " + std::to_string(m_test_value) + ";\n" - "}\n"; - #else - return - "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - // struct template - "template\n" - "struct ctor_test_class_base {\n" - // non-trivial ctor - " ctor_test_class_base(T x) { m_x = x;};\n" - " T m_x;\n" - "};\n" - // struct template - "template\n" - "struct ctor_test_class : public ctor_test_class_base {\n" - // non-trivial ctor - " ctor_test_class(T x, T y) : ctor_test_class_base(x), m_y(y) { };\n" - " T m_y;\n" - "};\n" - // global scope program variables - "ctor_test_class global_var(uint(0), uint(0));\n" - - "__kernel void " + this->get_kernel_names()[0] + "(global_ptr output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " if(gid == 0) {\n" - " global_var.m_x = " + std::to_string(m_test_value) + ";\n" - " global_var.m_y = 0xbeefbeef;\n" - " }\n" - "}\n" - - "__kernel void " + this->get_kernel_names()[1] + "(global_ptr output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " if(gid % 2 == 0)\n" - " output[gid] = global_var.m_x;\n" - " else\n" - " output[gid] = global_var.m_y;\n" - "}\n"; - #endif - } - -private: - cl_uint m_test_value; -}; - -// Test for program scope variable with non-trivial dtor -struct ps_dtor_test : public ps_ctors_dtors_test_base -{ - ps_dtor_test(const cl_uint test_value) - : ps_ctors_dtors_test_base(false, true), - m_test_value(test_value) - { - - } - - std::string str() - { - return "ps_dtor_test"; - } - - // Returns value that is expected to be in output_buffer[i] - cl_uint operator()(size_t i) - { - if(i % 2 == 0) - return m_test_value; - return 1; - } - - // In 1st kernel 0th work-item saves pointer to output buffer and its size in program scope - // variable global_var, it also sets counter to 1; - // After global_var is destroyed all even elements of output buffer should equal m_test_value, - // and all odd should equal 1. - // If odd elements of output buffer are >1 it means dtor was executed more than once. - std::string generate_program() - { - // ----------------------------------------------------------------------------------- - // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ - // ----------------------------------------------------------------------------------- - #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - return - "__kernel void " + this->get_kernel_name() + "(global uint *output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " if(gid % 2 == 0)\n" - " output[gid] = " + std::to_string(m_test_value) + ";\n" - " else\n" - " output[gid] = 1;\n" - "}\n"; - #else - return - "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - // struct template - "template\n" - "struct dtor_test_class_base {\n" - // non-trivial dtor - // set all odd elements in buffer to counter - " ~dtor_test_class_base() {\n" - " for(size_t i = 1; i < this->size; i+=2)\n" - " {\n" - " this->buffer[i] = counter;\n" - " }\n" - " counter++;\n" - " };\n" - " global_ptr buffer;\n" - " size_t size;\n" - " T counter;\n" - "};\n" - // struct - "struct dtor_test_class : public dtor_test_class_base {\n" - // non-trivial dtor - // set all values in buffer to m_test_value - " ~dtor_test_class() {\n" - " for(size_t i = 0; i < this->size; i+=2)\n" - " this->buffer[i] = " + std::to_string(m_test_value) + ";\n" - " };\n" - "};\n" - // global scope program variable - "dtor_test_class global_var;\n" - - // When global_var is being destroyed, first dtor ~dtor_test_class is called, - // and then ~dtor_test_class_base is called. - - "__kernel void " + this->get_kernel_name() + "(global_ptr output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - // set buffer and size in global var - " if(gid == 0){\n" - " global_var.buffer = output;\n" - " global_var.size = get_global_size(0);\n" - " global_var.counter = 1;\n" - " }\n" - "}\n"; - #endif - } - -private: - cl_uint m_test_value; -}; - -// Test for program scope variable with both non-trivial ctor -// and non-trivial dtor -struct ps_ctor_dtor_test : public ps_ctors_dtors_test_base -{ - ps_ctor_dtor_test(const cl_uint test_value) - : ps_ctors_dtors_test_base(false, true), - m_test_value(test_value) - { - - } - - std::string str() - { - return "ps_ctor_dtor_test"; - } - - // Returns value that is expected to be in output_buffer[i] - cl_uint operator()(size_t i) - { - return m_test_value; - } - - // In 1st kernel 0th work-item saves pointer to output buffer and its size in program scope - // variable global_var. - // After global_var is destroyed all even elements of output buffer should equal m_test_value, - // and all odd should equal 1. - std::string generate_program() - { - // ----------------------------------------------------------------------------------- - // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ - // ----------------------------------------------------------------------------------- - #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - return - "__kernel void " + this->get_kernel_name() + "(global uint *output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " output[gid] = " + std::to_string(m_test_value) + ";\n" - "}\n"; - #else - return - "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - // struct template - "template\n" - "struct ctor_test_class {\n" - // non-trivial ctor - " ctor_test_class(T value) : m_value(value) { };\n" - " T m_value;\n" - "};\n\n" - // struct - "struct ctor_dtor_test_class {\n" - // non-trivial ctor - " ctor_dtor_test_class(uint value) : ctor_test(value) { } \n" - // non-trivial dtor - // set all values in buffer to m_test_value - " ~ctor_dtor_test_class() {\n" - " for(size_t i = 0; i < this->size; i++)\n" - " {\n" - " this->buffer[i] = ctor_test.m_value;\n" - " }\n" - " };\n" - " ctor_test_class ctor_test;\n" - " global_ptr buffer;\n" - " size_t size;\n" - "};\n" - // global scope program variable - "ctor_dtor_test_class global_var(" + std::to_string(m_test_value) + ");\n" - - "__kernel void " + this->get_kernel_name() + "(global_ptr output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - // set buffer and size in global var - " if(gid == 0){\n" - " global_var.buffer = output;\n" - " global_var.size = get_global_size(0);\n" - " }\n" - "}\n"; - #endif - } - -private: - cl_uint m_test_value; -}; - -// This contains tests for program scope (global) constructors and destructors, more -// detailed tests are also in clcpp/api. -AUTO_TEST_CASE(test_program_scope_ctors_dtors) -(cl_device_id device, cl_context context, cl_command_queue queue, int count) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - - RUN_PS_CTORS_DTORS_TEST_MACRO(ps_ctor_test(0xdeadbeefU)) - RUN_PS_CTORS_DTORS_TEST_MACRO(ps_dtor_test(0xbeefdeadU)) - RUN_PS_CTORS_DTORS_TEST_MACRO(ps_ctor_dtor_test(0xdeaddeadU)) - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_PS_CTORS_DTORS_TEST_CTORS_DTORS_HPP diff --git a/test_conformance/clcpp/reinterpret/CMakeLists.txt b/test_conformance/clcpp/reinterpret/CMakeLists.txt deleted file mode 100644 index ed02c56fba..0000000000 --- a/test_conformance/clcpp/reinterpret/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -set(MODULE_NAME CPP_REINTERPRET) - -set(${MODULE_NAME}_SOURCES - main.cpp -) - -include(../../CMakeCommon.txt) diff --git a/test_conformance/clcpp/reinterpret/as_type.hpp b/test_conformance/clcpp/reinterpret/as_type.hpp deleted file mode 100644 index da088cfab6..0000000000 --- a/test_conformance/clcpp/reinterpret/as_type.hpp +++ /dev/null @@ -1,223 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_REINTERPRET_AS_TYPE_HPP -#define TEST_CONFORMANCE_CLCPP_REINTERPRET_AS_TYPE_HPP - -#include "../common.hpp" -#include "../funcs_test_utils.hpp" - -#include - - -template -struct as_type : public unary_func -{ - static_assert(sizeof(IN1) == sizeof(OUT1), "It is an error to use the as_type operator to reinterpret data to a type of a different number of bytes"); - - std::string str() - { - return "as_type"; - } - - std::string headers() - { - return "#include \n"; - } - - OUT1 operator()(const IN1& x) - { - return *reinterpret_cast(&x); - } -}; - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -template -std::string generate_kernel_as_type(func_type func) -{ - std::string in1_value = "input[gid]"; - std::string function_call = "as_" + type_name() + "(" + in1_value + ");"; - return - "__kernel void test_" + func.str() + "(global " + type_name() + " *input, global " + type_name() + " *output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " output[gid] = " + function_call + ";\n" - "}\n"; -} -#else -template -std::string generate_kernel_as_type(func_type func) -{ - std::string headers = func.headers(); - std::string in1_value = "input[gid]"; - std::string function_call = "as_type<" + type_name() + ">(" + in1_value + ")"; - return - "" + func.defs() + - "" + headers + - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void test_" + func.str() + "(global_ptr<" + type_name() + "[]> input," - "global_ptr<" + type_name() + "[]> output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " output[gid] = " + function_call + ";\n" - "}\n"; -} -#endif - -template -bool verify_as_type(const std::vector &in, const std::vector &out, as_type_op op) -{ - // When the operand and result type contain a different number of elements, the result is implementation-defined, - // i.e. any result is correct - if (vector_size::value == vector_size::value) - { - for (size_t i = 0; i < in.size(); i++) - { - auto expected = op(in[i]); - if (std::memcmp(&expected, &out[i], sizeof(expected)) != 0) - { - print_error_msg(expected, out[i], i, op); - return false; - } - } - } - return true; -} - -template -int test_as_type_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, as_type_op op) -{ - cl_mem buffers[2]; - cl_program program; - cl_kernel kernel; - size_t work_size[1]; - int error; - - typedef typename as_type_op::in_type INPUT; - typedef typename as_type_op::out_type OUTPUT; - - // Don't run test for unsupported types - if (!(type_supported(device) && type_supported(device))) - { - return CL_SUCCESS; - } - - std::string code_str = generate_kernel_as_type(op); - std::string kernel_name("test_"); kernel_name += op.str(); - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name); - RETURN_ON_ERROR(error) - return error; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false); - RETURN_ON_ERROR(error) -#else - error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name); - RETURN_ON_ERROR(error) -#endif - - std::vector input = generate_input(count, op.min1(), op.max1(), op.in_special_cases()); - std::vector output = generate_output(count); - - buffers[0] = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(INPUT) * input.size(), NULL, &error); - RETURN_ON_CL_ERROR(error, "clCreateBuffer") - - buffers[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(OUTPUT) * output.size(), NULL, &error); - RETURN_ON_CL_ERROR(error, "clCreateBuffer") - - error = clEnqueueWriteBuffer( - queue, buffers[0], CL_TRUE, 0, sizeof(INPUT) * input.size(), - static_cast(input.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer") - - error = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - error = clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - - work_size[0] = count; - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel") - - error = clEnqueueReadBuffer( - queue, buffers[1], CL_TRUE, 0, sizeof(OUTPUT) * output.size(), - static_cast(output.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer") - - if (!verify_as_type(input, output, op)) - { - RETURN_ON_ERROR_MSG(-1, "test_%s %s(%s) failed", op.str().c_str(), type_name().c_str(), type_name().c_str()); - } - log_info("test_%s %s(%s) passed\n", op.str().c_str(), type_name().c_str(), type_name().c_str()); - - clReleaseMemObject(buffers[0]); - clReleaseMemObject(buffers[1]); - clReleaseKernel(kernel); - clReleaseProgram(program); - return error; -} - -AUTO_TEST_CASE(test_as_type) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - -#define TEST_AS_TYPE_MACRO(TYPE1, TYPE2) \ - last_error = test_as_type_func( \ - device, context, queue, n_elems, as_type() \ - ); \ - CHECK_ERROR(last_error) \ - error |= last_error; - - TEST_AS_TYPE_MACRO(cl_int, cl_int) - TEST_AS_TYPE_MACRO(cl_uint, cl_int) - TEST_AS_TYPE_MACRO(cl_int, cl_ushort2) - TEST_AS_TYPE_MACRO(cl_uchar, cl_uchar) - TEST_AS_TYPE_MACRO(cl_char4, cl_ushort2) - TEST_AS_TYPE_MACRO(cl_uchar16, cl_char16) - TEST_AS_TYPE_MACRO(cl_short8, cl_uchar16) - TEST_AS_TYPE_MACRO(cl_float4, cl_uint4) - TEST_AS_TYPE_MACRO(cl_float16, cl_int16) - TEST_AS_TYPE_MACRO(cl_long2, cl_float4) - TEST_AS_TYPE_MACRO(cl_ulong, cl_long) - TEST_AS_TYPE_MACRO(cl_ulong16, cl_double16) - TEST_AS_TYPE_MACRO(cl_uchar16, cl_double2) - TEST_AS_TYPE_MACRO(cl_ulong4, cl_short16) - -#undef TEST_AS_TYPE_MACRO - - if (error != CL_SUCCESS) - { - return -1; - } - return error; -} - - -#endif // TEST_CONFORMANCE_CLCPP_REINTERPRET_AS_TYPE_HPP diff --git a/test_conformance/clcpp/relational_funcs/CMakeLists.txt b/test_conformance/clcpp/relational_funcs/CMakeLists.txt deleted file mode 100644 index 3a8389cba9..0000000000 --- a/test_conformance/clcpp/relational_funcs/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -set(MODULE_NAME CPP_RELATIONAL_FUNCS) - -set(${MODULE_NAME}_SOURCES - main.cpp -) - -include(../../CMakeCommon.txt) diff --git a/test_conformance/clcpp/relational_funcs/common.hpp b/test_conformance/clcpp/relational_funcs/common.hpp deleted file mode 100644 index a13f7bacd6..0000000000 --- a/test_conformance/clcpp/relational_funcs/common.hpp +++ /dev/null @@ -1,112 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMMON_HPP -#define TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMMON_HPP - -#include "../common.hpp" -#include "../funcs_test_utils.hpp" - -#include -#include - -template -OUT1 perform_function(const IN1& in1, const IN2& in2, const IN3& in3, F func, typename std::enable_if::value>::type* = 0) -{ - OUT1 result; - for(size_t i = 0; i < vector_size::value; i++) - { - result.s[i] = func(in1.s[i], in2.s[i], in3.s[i]); - } - return result; -} - -template -OUT1 perform_function(const IN1& in1, const IN2& in2, const IN3& in3, F func, typename std::enable_if::value>::type* = 0) -{ - OUT1 result = func(in1, in2, in3); - return result; -} - - -template -OUT1 perform_function(const IN1& in1, const IN2& in2, F func, typename std::enable_if::value>::type* = 0) -{ - OUT1 result; - for(size_t i = 0; i < vector_size::value; i++) - { - result.s[i] = func(in1.s[i], in2.s[i]); - } - return result; -} - -template -OUT1 perform_function(const IN1& in1, const IN2& in2, F func, typename std::enable_if::value>::type* = 0) -{ - OUT1 result = func(in1, in2); - return result; -} - -template -OUT1 perform_function(const IN1& in1, F func, typename std::enable_if::value>::type* = 0) -{ - OUT1 result; - for(size_t i = 0; i < vector_size::value; i++) - { - result.s[i] = func(in1.s[i]); - } - return result; -} - -template -OUT1 perform_function(const IN1& in1, F func, typename std::enable_if::value>::type* = 0) -{ - OUT1 result = func(in1); - return result; -} - -template -cl_int perform_all_function(const IN1& in1, typename std::enable_if::value>::type* = 0) -{ - cl_int result = 1; - for(size_t i = 0; i < vector_size::value; i++) - { - result = (in1.s[i] != 0) ? result : cl_int(0); - } - return result; -} - -cl_int perform_all_function(const cl_int& in1, typename std::enable_if::value>::type* = 0) -{ - return (in1 != 0) ? cl_int(1) : cl_int(0); -} - -template -cl_int perform_any_function(const IN1& in1, typename std::enable_if::value>::type* = 0) -{ - cl_int result = 0; - for(size_t i = 0; i < vector_size::value; i++) - { - result = (in1.s[i] != 0) ? cl_int(1) : result; - } - return result; -} - -cl_int perform_any_function(const cl_int& in1, typename std::enable_if::value>::type* = 0) -{ - return (in1 != 0) ? cl_int(1) : cl_int(0); -} - -#endif // TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMMON_HPP diff --git a/test_conformance/clcpp/relational_funcs/comparison_funcs.hpp b/test_conformance/clcpp/relational_funcs/comparison_funcs.hpp deleted file mode 100644 index 980d67c843..0000000000 --- a/test_conformance/clcpp/relational_funcs/comparison_funcs.hpp +++ /dev/null @@ -1,150 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMPARISON_FUNCS_HPP -#define TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMPARISON_FUNCS_HPP - -#include "common.hpp" - -// This marco creates a class wrapper for comparision function we want to test. -#define DEF_COMPARISION_FUNC(CLASS_NAME, FUNC_NAME, HOST_FUNC_EXPRESSION) \ -template \ -struct CLASS_NAME : public binary_func< \ - typename make_vector_type::type, /* create cl_floatN type */ \ - typename make_vector_type::type, /* create cl_floatN type */ \ - typename make_vector_type::type /* create cl_intN type */ \ - > \ -{ \ - typedef typename make_vector_type::type input_type; \ - typedef typename make_vector_type::type result_type; \ - \ - std::string str() \ - { \ - return #FUNC_NAME; \ - } \ - \ - std::string headers() \ - { \ - return "#include \n"; \ - } \ - \ - result_type operator()(const input_type& x, const input_type& y) \ - { \ - typedef typename scalar_type::type SCALAR; \ - return perform_function( \ - x, y, \ - [](const SCALAR& a, const SCALAR& b) \ - { \ - if(HOST_FUNC_EXPRESSION) \ - { \ - return cl_int(1); \ - } \ - return cl_int(0); \ - } \ - ); \ - } \ - \ - bool is_out_bool() \ - { \ - return true; \ - } \ - \ - input_type min1() \ - { \ - return detail::def_limit(-10000.0f); \ - } \ - \ - input_type max1() \ - { \ - return detail::def_limit(10000.0f); \ - } \ - \ - input_type min2() \ - { \ - return detail::def_limit(-10000.0f); \ - } \ - \ - input_type max2() \ - { \ - return detail::def_limit(10000.0f); \ - } \ - \ - std::vector in1_special_cases() \ - { \ - typedef typename scalar_type::type SCALAR; \ - return { \ - detail::make_value(std::numeric_limits::infinity()), \ - detail::make_value(-std::numeric_limits::infinity()), \ - detail::make_value(std::numeric_limits::quiet_NaN()), \ - detail::make_value(0.0f), \ - detail::make_value(-0.0f) \ - }; \ - } \ - \ - std::vector in2_special_cases() \ - { \ - typedef typename scalar_type::type SCALAR; \ - return { \ - detail::make_value(std::numeric_limits::infinity()), \ - detail::make_value(-std::numeric_limits::infinity()), \ - detail::make_value(std::numeric_limits::quiet_NaN()), \ - detail::make_value(0.0f), \ - detail::make_value(-0.0f) \ - }; \ - } \ -}; - -DEF_COMPARISION_FUNC(comparison_func_isequal, isequal, (a == b)) -DEF_COMPARISION_FUNC(comparison_func_isnotequal, isnotequal, !(a == b)) -DEF_COMPARISION_FUNC(comparison_func_isgreater, isgreater, (std::isgreater)(a, b)) -DEF_COMPARISION_FUNC(comparison_func_isgreaterequal, isgreaterequal, ((std::isgreater)(a, b) || a == b)) -DEF_COMPARISION_FUNC(comparison_func_isless, isless, (std::isless)(a, b)) -DEF_COMPARISION_FUNC(comparison_func_islessequal, islessequal, ((std::isless)(a, b) || a == b)) -DEF_COMPARISION_FUNC(comparison_func_islessgreater, islessgreater, ((a < b) || (a > b))) - -#undef DEF_COMPARISION_FUNC - -AUTO_TEST_CASE(test_relational_comparison_funcs) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - -// Helper macro, so we don't have to repreat the same code. -#define TEST_BINARY_REL_FUNC_MACRO(CLASS_NAME) \ - TEST_BINARY_FUNC_MACRO(CLASS_NAME<1>()) \ - TEST_BINARY_FUNC_MACRO(CLASS_NAME<2>()) \ - TEST_BINARY_FUNC_MACRO(CLASS_NAME<4>()) \ - TEST_BINARY_FUNC_MACRO(CLASS_NAME<8>()) \ - TEST_BINARY_FUNC_MACRO(CLASS_NAME<16>()) - - TEST_BINARY_REL_FUNC_MACRO(comparison_func_isequal) - TEST_BINARY_REL_FUNC_MACRO(comparison_func_isnotequal) - TEST_BINARY_REL_FUNC_MACRO(comparison_func_isgreater) - TEST_BINARY_REL_FUNC_MACRO(comparison_func_isgreaterequal) - TEST_BINARY_REL_FUNC_MACRO(comparison_func_isless) - TEST_BINARY_REL_FUNC_MACRO(comparison_func_islessequal) - TEST_BINARY_REL_FUNC_MACRO(comparison_func_islessgreater) - -#undef TEST_BINARY_REL_FUNC_MACRO - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMPARISON_FUNCS_HPP diff --git a/test_conformance/clcpp/relational_funcs/main.cpp b/test_conformance/clcpp/relational_funcs/main.cpp deleted file mode 100644 index 2b72d3d24f..0000000000 --- a/test_conformance/clcpp/relational_funcs/main.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "../common.hpp" - -#include "comparison_funcs.hpp" -#include "select_funcs.hpp" -#include "test_funcs.hpp" - -int main(int argc, const char *argv[]) -{ - auto& tests = autotest::test_suite::global_test_suite().test_defs; - return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0); -} diff --git a/test_conformance/clcpp/relational_funcs/select_funcs.hpp b/test_conformance/clcpp/relational_funcs/select_funcs.hpp deleted file mode 100644 index 2e6f6bdd58..0000000000 --- a/test_conformance/clcpp/relational_funcs/select_funcs.hpp +++ /dev/null @@ -1,158 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_SELECT_FUNCS_HPP -#define TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_SELECT_FUNCS_HPP - -#include "common.hpp" - -template -struct select_func_select : public ternary_func< - typename make_vector_type::type, /* create IN1N type */ - typename make_vector_type::type, /* create IN1N type */ - typename make_vector_type::type, /* create cl_intN type */ - typename make_vector_type::type /* create IN1N type */ - > -{ - typedef typename make_vector_type::type input1_type; - typedef typename make_vector_type::type input2_type; - typedef typename make_vector_type::type input3_type; - typedef typename make_vector_type::type result_type; - - std::string str() - { - return "select"; - } - - std::string headers() - { - return "#include \n"; - } - - result_type operator()(const input1_type& x, const input2_type& y, const input3_type& z) - { - typedef typename scalar_type::type SCALAR1; - typedef typename scalar_type::type SCALAR2; - typedef typename scalar_type::type SCALAR3; - - return perform_function( - x, y, z, - [](const SCALAR1& a, const SCALAR2& b, const SCALAR3& c) - { - return (c != 0) ? b : a; - } - ); - } - - bool is_in3_bool() - { - return true; - } - - std::vector in3_special_cases() - { - return { - detail::make_value(0), - detail::make_value(1), - detail::make_value(12), - detail::make_value(-12) - }; - } -}; - -template -struct select_func_bitselect : public ternary_func< - typename make_vector_type::type, /* create IN1N type */ - typename make_vector_type::type, /* create IN1N type */ - typename make_vector_type::type, /* create cl_intN type */ - typename make_vector_type::type /* create IN1N type */ - > -{ - typedef typename make_vector_type::type input1_type; - typedef typename make_vector_type::type input2_type; - typedef typename make_vector_type::type input3_type; - typedef typename make_vector_type::type result_type; - - std::string str() - { - return "bitselect"; - } - - std::string headers() - { - return "#include \n"; - } - - result_type operator()(const input1_type& x, const input2_type& y, const input3_type& z) - { - static_assert( - std::is_integral::value, - "bitselect test is implemented only for integers." - ); - static_assert( - std::is_unsigned::value, - "IN1 type should be unsigned, bitwise operations on signed int may cause problems." - ); - typedef typename scalar_type::type SCALAR1; - typedef typename scalar_type::type SCALAR2; - typedef typename scalar_type::type SCALAR3; - - return perform_function( - x, y, z, - [](const SCALAR1& a, const SCALAR2& b, const SCALAR3& c) - { - return (~c & a) | (c & b); - } - ); - } -}; - -AUTO_TEST_CASE(test_relational_select_funcs) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - -// Tests for select(gentype a, gentype b, booln c) are not run in USE_OPENCLC_KERNELS -// mode, because this functions in OpenCL C requires different reference functions on host -// compared to their equivalent in OpenCL C++. -// (In OpenCL C the result of select(), when gentype is vector type, is based on the most -// significant bits of c components) -#ifndef USE_OPENCLC_KERNELS - // gentype select(gentype a, gentype b, booln c) - TEST_TERNARY_FUNC_MACRO((select_func_select())) - TEST_TERNARY_FUNC_MACRO((select_func_select())) - TEST_TERNARY_FUNC_MACRO((select_func_select())) - TEST_TERNARY_FUNC_MACRO((select_func_select())) - TEST_TERNARY_FUNC_MACRO((select_func_select())) -#else - log_info("WARNING:\n\tTests for select(gentype a, gentype b, booln c) are not run in USE_OPENCLC_KERNELS mode\n"); -#endif - - // gentype bitselect(gentype a, gentype b, gentype c) - TEST_TERNARY_FUNC_MACRO((select_func_bitselect())) - TEST_TERNARY_FUNC_MACRO((select_func_bitselect())) - TEST_TERNARY_FUNC_MACRO((select_func_bitselect())) - TEST_TERNARY_FUNC_MACRO((select_func_bitselect())) - TEST_TERNARY_FUNC_MACRO((select_func_bitselect())) - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_SELECT_FUNCS_HPP diff --git a/test_conformance/clcpp/relational_funcs/test_funcs.hpp b/test_conformance/clcpp/relational_funcs/test_funcs.hpp deleted file mode 100644 index 77e3d871c1..0000000000 --- a/test_conformance/clcpp/relational_funcs/test_funcs.hpp +++ /dev/null @@ -1,336 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_TEST_FUNCS_HPP -#define TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_TEST_FUNCS_HPP - -#include "common.hpp" - -// This marco creates a class wrapper for unary test function we want to test. -#define DEF_UNARY_TEST_FUNC(CLASS_NAME, FUNC_NAME, HOST_FUNC_EXPRESSION) \ -template \ -struct CLASS_NAME : public unary_func< \ - typename make_vector_type::type, /* create cl_floatN type */ \ - typename make_vector_type::type /* create cl_intN type */ \ - > \ -{ \ - typedef typename make_vector_type::type input_type; \ - typedef typename make_vector_type::type result_type; \ - \ - std::string str() \ - { \ - return #FUNC_NAME; \ - } \ - \ - std::string headers() \ - { \ - return "#include \n"; \ - } \ - \ - result_type operator()(const input_type& x) \ - { \ - typedef typename scalar_type::type SCALAR; \ - return perform_function( \ - x, \ - [](const SCALAR& a) \ - { \ - if(HOST_FUNC_EXPRESSION) \ - { \ - return cl_int(1); \ - } \ - return cl_int(0); \ - } \ - ); \ - } \ - \ - bool is_out_bool() \ - { \ - return true; \ - } \ - \ - input_type min1() \ - { \ - return detail::def_limit(-10000.0f); \ - } \ - \ - input_type max1() \ - { \ - return detail::def_limit(10000.0f); \ - } \ - \ - std::vector in1_special_cases() \ - { \ - typedef typename scalar_type::type SCALAR; \ - return { \ - detail::make_value(std::numeric_limits::infinity()), \ - detail::make_value(-std::numeric_limits::infinity()), \ - detail::make_value(std::numeric_limits::quiet_NaN()), \ - detail::make_value(std::numeric_limits::signaling_NaN()), \ - detail::make_value(std::numeric_limits::denorm_min()), \ - detail::make_value(0.0f), \ - detail::make_value(-0.0f) \ - }; \ - } \ -}; - -// This marco creates a class wrapper for binary test function we want to test. -#define DEF_BINARY_TEST_FUNC(CLASS_NAME, FUNC_NAME, HOST_FUNC_EXPRESSION) \ -template \ -struct CLASS_NAME : public binary_func< \ - typename make_vector_type::type, /* create cl_floatN type */ \ - typename make_vector_type::type, /* create cl_floatN type */ \ - typename make_vector_type::type /* create cl_intN type */ \ - > \ -{ \ - typedef typename make_vector_type::type input_type; \ - typedef typename make_vector_type::type result_type; \ - \ - std::string str() \ - { \ - return #FUNC_NAME; \ - } \ - \ - std::string headers() \ - { \ - return "#include \n"; \ - } \ - \ - result_type operator()(const input_type& x, const input_type& y) \ - { \ - typedef typename scalar_type::type SCALAR; \ - return perform_function( \ - x, y, \ - [](const SCALAR& a, const SCALAR& b) \ - { \ - if(HOST_FUNC_EXPRESSION) \ - { \ - return cl_int(1); \ - } \ - return cl_int(0); \ - } \ - ); \ - } \ - \ - bool is_out_bool() \ - { \ - return true; \ - } \ - \ - input_type min1() \ - { \ - return detail::def_limit(-10000.0f); \ - } \ - \ - input_type max1() \ - { \ - return detail::def_limit(10000.0f); \ - } \ - \ - input_type min2() \ - { \ - return detail::def_limit(-10000.0f); \ - } \ - \ - input_type max2() \ - { \ - return detail::def_limit(10000.0f); \ - } \ - \ - std::vector in1_special_cases() \ - { \ - typedef typename scalar_type::type SCALAR; \ - return { \ - detail::make_value(std::numeric_limits::infinity()), \ - detail::make_value(-std::numeric_limits::infinity()), \ - detail::make_value(std::numeric_limits::quiet_NaN()), \ - detail::make_value(std::numeric_limits::signaling_NaN()), \ - detail::make_value(std::numeric_limits::denorm_min()), \ - detail::make_value(0.0f), \ - detail::make_value(-0.0f) \ - }; \ - } \ - \ - std::vector in2_special_cases() \ - { \ - typedef typename scalar_type::type SCALAR; \ - return { \ - detail::make_value(std::numeric_limits::infinity()), \ - detail::make_value(-std::numeric_limits::infinity()), \ - detail::make_value(std::numeric_limits::quiet_NaN()), \ - detail::make_value(std::numeric_limits::signaling_NaN()), \ - detail::make_value(std::numeric_limits::denorm_min()), \ - detail::make_value(0.0f), \ - detail::make_value(-0.0f) \ - }; \ - } \ -}; - -DEF_UNARY_TEST_FUNC(test_func_isfinite, isfinite, (std::isfinite)(a)) -DEF_UNARY_TEST_FUNC(test_func_isinf, isinf, (std::isinf)(a)) -DEF_UNARY_TEST_FUNC(test_func_isnan, isnan, (std::isnan)(a)) -DEF_UNARY_TEST_FUNC(test_func_isnormal, isnormal, (std::isnormal)(a)) -DEF_UNARY_TEST_FUNC(test_func_signbit, signbit , (std::signbit)(a)) - -DEF_BINARY_TEST_FUNC(test_func_isordered, isordered, !(std::isunordered)(a, b)) -DEF_BINARY_TEST_FUNC(test_func_isunordered, isunordered, (std::isunordered)(a, b)) - -#undef DEF_UNARY_TEST_FUNC -#undef DEF_BINARY_TEST_FUNC - -template -struct test_func_all : public unary_func< - typename make_vector_type::type, /* create cl_intN type */ - cl_int /* create cl_intN type */ - > -{ - typedef typename make_vector_type::type input_type; - typedef cl_int result_type; - - std::string str() - { - return "all"; - } - - std::string headers() - { - return "#include \n"; - } - - result_type operator()(const input_type& x) - { - return perform_all_function(x); - } - - bool is_out_bool() - { - return true; - } - - bool is_in1_bool() - { - return true; - } - - std::vector in1_special_cases() - { - return { - detail::make_value(0), - detail::make_value(1), - detail::make_value(12), - detail::make_value(-12) - }; - } -}; - -template -struct test_func_any : public unary_func< - typename make_vector_type::type, /* create cl_intN type */ - cl_int /* create cl_intN type */ - > -{ - typedef typename make_vector_type::type input_type; - typedef cl_int result_type; - - std::string str() - { - return "any"; - } - - std::string headers() - { - return "#include \n"; - } - - result_type operator()(const input_type& x) - { - return perform_any_function(x); - } - - bool is_out_bool() - { - return true; - } - - bool is_in1_bool() - { - return true; - } - - std::vector in1_special_cases() - { - return { - detail::make_value(0), - detail::make_value(1), - detail::make_value(12), - detail::make_value(-12) - }; - } -}; - -AUTO_TEST_CASE(test_relational_test_funcs) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - -// Helper macro, so we don't have to repreat the same code. -#define TEST_UNARY_REL_FUNC_MACRO(CLASS_NAME) \ - TEST_UNARY_FUNC_MACRO(CLASS_NAME<1>()) \ - TEST_UNARY_FUNC_MACRO(CLASS_NAME<2>()) \ - TEST_UNARY_FUNC_MACRO(CLASS_NAME<4>()) \ - TEST_UNARY_FUNC_MACRO(CLASS_NAME<8>()) \ - TEST_UNARY_FUNC_MACRO(CLASS_NAME<16>()) - - TEST_UNARY_REL_FUNC_MACRO(test_func_isfinite) - TEST_UNARY_REL_FUNC_MACRO(test_func_isinf) - TEST_UNARY_REL_FUNC_MACRO(test_func_isnan) - TEST_UNARY_REL_FUNC_MACRO(test_func_isnormal) - TEST_UNARY_REL_FUNC_MACRO(test_func_signbit) - -// Tests for all(booln x) and any(booln x) are not run in USE_OPENCLC_KERNELS mode, -// because those functions in OpenCL C require different reference functions on host -// compared to their equivalents from OpenCL C++. -// (In OpenCL C those functions returns true/false based on the most significant bits -// in any/all component/s of x) -#ifndef USE_OPENCLC_KERNELS - TEST_UNARY_REL_FUNC_MACRO(test_func_all) - TEST_UNARY_REL_FUNC_MACRO(test_func_any) -#else - log_info("WARNING:\n\tTests for bool all(booln x) are not run in USE_OPENCLC_KERNELS mode\n"); - log_info("WARNING:\n\tTests for bool any(booln x) are not run in USE_OPENCLC_KERNELS mode\n"); -#endif - -#undef TEST_UNARY_REL_FUNC_MACRO - -#define TEST_BINARY_REL_FUNC_MACRO(CLASS_NAME) \ - TEST_BINARY_FUNC_MACRO(CLASS_NAME<1>()) \ - TEST_BINARY_FUNC_MACRO(CLASS_NAME<2>()) \ - TEST_BINARY_FUNC_MACRO(CLASS_NAME<4>()) \ - TEST_BINARY_FUNC_MACRO(CLASS_NAME<8>()) \ - TEST_BINARY_FUNC_MACRO(CLASS_NAME<16>()) - - TEST_BINARY_REL_FUNC_MACRO(test_func_isordered) - TEST_BINARY_REL_FUNC_MACRO(test_func_isunordered) - -#undef TEST_BINARY_REL_FUNC_MACRO - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_TEST_FUNCS_HPP diff --git a/test_conformance/clcpp/spec_constants/CMakeLists.txt b/test_conformance/clcpp/spec_constants/CMakeLists.txt deleted file mode 100644 index 3488a5a783..0000000000 --- a/test_conformance/clcpp/spec_constants/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -set(MODULE_NAME CPP_SPEC_CONSTANTS) - -set(${MODULE_NAME}_SOURCES - main.cpp -) - -include(../../CMakeCommon.txt) diff --git a/test_conformance/clcpp/spec_constants/common.hpp b/test_conformance/clcpp/spec_constants/common.hpp deleted file mode 100644 index 17b31aebae..0000000000 --- a/test_conformance/clcpp/spec_constants/common.hpp +++ /dev/null @@ -1,256 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_COMMON_HPP -#define TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_COMMON_HPP - -#include - -#include "../common.hpp" -#include "../funcs_test_utils.hpp" - -#define RUN_SPEC_CONSTANTS_TEST_MACRO(TEST_CLASS) \ - last_error = run_spec_constants_test( \ - device, context, queue, n_elems, TEST_CLASS \ - ); \ - CHECK_ERROR(last_error) \ - error |= last_error; - -// Base class for all tests of cl::spec_contatnt -template -struct spec_constants_test : public detail::base_func_type -{ - // Output buffer type - typedef T type; - - virtual ~spec_constants_test() {}; - // Returns test name - virtual std::string str() = 0; - // Returns OpenCL program source - virtual std::string generate_program() = 0; - - // Return names of test's kernels, in order. - // Typical case: one kernel. - virtual std::vector get_kernel_names() - { - // Typical case, that is, only one kernel - return { this->get_kernel_name() }; - } - - // If local size has to be set in clEnqueueNDRangeKernel() - // this should return true; otherwise - false; - virtual bool set_local_size() - { - return false; - } - - // Calculates maximal work-group size (one dim) - virtual size_t get_max_local_size(const std::vector& kernels, - cl_device_id device, - size_t work_group_size, // default work-group size - cl_int& error) - { - size_t wg_size = work_group_size; - for(auto& k : kernels) - { - size_t max_wg_size; - error = clGetKernelWorkGroupInfo( - k, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL - ); - RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo") - wg_size = (std::min)(wg_size, max_wg_size); - } - return wg_size; - } - - // Sets spec constants - // Typical case: no spec constants to set - virtual cl_int set_spec_constants(const cl_program& program) - { - return CL_SUCCESS; - } - - // This covers typical case: - // 1. each kernel is executed once, - // 2. the only argument in every kernel is output_buffer - virtual cl_int execute(const std::vector& kernels, - cl_mem& output_buffer, - cl_command_queue& queue, - size_t work_size, - size_t work_group_size) - { - cl_int err; - for(auto& k : kernels) - { - err = clSetKernelArg(k, 0, sizeof(output_buffer), &output_buffer); - RETURN_ON_CL_ERROR(err, "clSetKernelArg"); - - err = clEnqueueNDRangeKernel( - queue, k, 1, - NULL, &work_size, this->set_local_size() ? &work_group_size : NULL, - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel"); - } - return err; - } - - // This is a function which performs additional queries and checks - // if the results are correct. This method is run after checking that - // test results (output values) are correct. - virtual cl_int check_queries(const std::vector& kernels, - cl_device_id device, - cl_context context, - cl_command_queue queue) - { - (void) kernels; - (void) device; - (void) context; - (void) queue; - return CL_SUCCESS; - } -}; - -template -int run_spec_constants_test(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, spec_constants_test op) -{ - cl_mem buffers[1]; - cl_program program; - std::vector kernels; - size_t wg_size; - size_t work_size[1]; - cl_int err; - - typedef typename spec_constants_test::type TYPE; - - // Don't run test for unsupported types - if(!(type_supported(device))) - { - return CL_SUCCESS; - } - - std::string code_str = op.generate_program(); - std::vector kernel_names = op.get_kernel_names(); - if(kernel_names.empty()) - { - RETURN_ON_ERROR_MSG(-1, "No kernel to run"); - } - kernels.resize(kernel_names.size()); - - std::string options = ""; - if(is_extension_available(device, "cl_khr_fp16")) - { - options += " -cl-fp16-enable"; - } - if(is_extension_available(device, "cl_khr_fp64")) - { - options += " -cl-fp64-enable"; - } -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0], options); - return err; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0], "-cl-std=CL2.0", false); - RETURN_ON_ERROR(err) - for(size_t i = 1; i < kernels.size(); i++) - { - kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err); - RETURN_ON_CL_ERROR(err, "clCreateKernel"); - } -#else - const char * code_c_str = code_str.c_str(); - err = create_openclcpp_program(context, &program, 1, &(code_c_str), options.c_str()); - RETURN_ON_ERROR_MSG(err, "Creating OpenCL C++ program failed") - - // Set spec constants - err = op.set_spec_constants(program); - RETURN_ON_ERROR_MSG(err, "Setting Spec Constants failed") - - // Build program and create 1st kernel - err = build_program_create_kernel_helper( - context, &program, &(kernels[0]), 1, &(code_c_str), kernel_names[0].c_str() - ); - RETURN_ON_ERROR_MSG(err, "Unable to build program or to create kernel") - // Create other kernels - for(size_t i = 1; i < kernels.size(); i++) - { - kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err); - RETURN_ON_CL_ERROR(err, "clCreateKernel"); - } -#endif - - // Find the max possible wg size for among all the kernels - wg_size = op.get_max_local_size(kernels, device, 1024, err); - RETURN_ON_ERROR(err); - - work_size[0] = count; - if(op.set_local_size()) - { - size_t wg_number = static_cast( - std::ceil(static_cast(count) / wg_size) - ); - work_size[0] = wg_number * wg_size; - } - - // host output vector - std::vector output = generate_output(work_size[0], 9999); - - // device output buffer - buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(TYPE) * output.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - // Execute test - err = op.execute(kernels, buffers[0], queue, work_size[0], wg_size); - RETURN_ON_ERROR(err) - - err = clEnqueueReadBuffer( - queue, buffers[0], CL_TRUE, 0, sizeof(TYPE) * output.size(), - static_cast(output.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer"); - - // Check output values - for(size_t i = 0; i < output.size(); i++) - { - TYPE v = op(i, wg_size); - if(!(are_equal(v, output[i], detail::make_value(0), op))) - { - RETURN_ON_ERROR_MSG(-1, - "test_%s(%s) failed. Expected: %s, got: %s", op.str().c_str(), type_name().c_str(), - format_value(v).c_str(), format_value(output[i]).c_str() - ); - } - } - - // Check if queries returns correct values - err = op.check_queries(kernels, device, context, queue); - RETURN_ON_ERROR(err); - - log_info("test_%s(%s) passed\n", op.str().c_str(), type_name().c_str()); - - clReleaseMemObject(buffers[0]); - for(auto& k : kernels) - clReleaseKernel(k); - clReleaseProgram(program); - return err; -} - -#endif // TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_COMMON_HPP diff --git a/test_conformance/clcpp/spec_constants/main.cpp b/test_conformance/clcpp/spec_constants/main.cpp deleted file mode 100644 index 305eb7dc8c..0000000000 --- a/test_conformance/clcpp/spec_constants/main.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "../common.hpp" - -#include "test_spec_consts_attributes.hpp" -#include "test_spec_consts_if.hpp" -#include "test_spec_consts_init_vars.hpp" - -int main(int argc, const char *argv[]) -{ - auto& tests = autotest::test_suite::global_test_suite().test_defs; - return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0); -} diff --git a/test_conformance/clcpp/spec_constants/test_spec_consts_attributes.hpp b/test_conformance/clcpp/spec_constants/test_spec_consts_attributes.hpp deleted file mode 100644 index 539167fd7e..0000000000 --- a/test_conformance/clcpp/spec_constants/test_spec_consts_attributes.hpp +++ /dev/null @@ -1,281 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_ATTRIBUTES_HPP -#define TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_ATTRIBUTES_HPP - -#include - -#include "common.hpp" - -// In this test we check if specialization constant can be successfully used -// in kernel attribute cl::required_work_group_size(X, Y, Z). -struct spec_const_required_work_group_size_test : public spec_constants_test -{ - // See generate_program() to know what set_spec_constant is for. - spec_const_required_work_group_size_test(const bool set_spec_constant, - const cl_uint work_group_size_0) - : m_set_spec_constant(set_spec_constant), - m_work_group_size_0(work_group_size_0) - { - - } - - std::string str() - { - if(m_set_spec_constant) - return "spec_const_in_required_work_group_size_" + std::to_string(m_work_group_size_0); - else - return "spec_const_in_required_work_group_size_not_set"; - } - - bool set_local_size() - { - return true; - } - - size_t get_max_local_size(const std::vector& kernels, - cl_device_id device, - size_t work_group_size, // default work-group size - cl_int& error) - { - if(m_set_spec_constant) - { - return m_work_group_size_0; - } - return size_t(1); - } - - cl_uint operator()(size_t i, size_t work_group_size) - { - (void) work_group_size; - if(m_set_spec_constant) - { - return m_work_group_size_0; - } - return cl_uint(1); - } - - // Check if query for CL_KERNEL_COMPILE_WORK_GROUP_SIZE using clGetKernelWorkGroupInfo - // return correct values. It should return the work-group size specified by the - // cl::required_work_group_size(X, Y, Z) qualifier. - cl_int check_queries(const std::vector& kernels, - cl_device_id device, - cl_context context, - cl_command_queue queue) - { - (void) device; - (void) context; - size_t compile_wg_size[] = { 1, 1, 1 }; - cl_int error = clGetKernelWorkGroupInfo( - kernels[0], device, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, - sizeof(compile_wg_size), compile_wg_size, NULL - ); - RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo") - if(m_set_spec_constant) - { - if(compile_wg_size[0] != m_work_group_size_0 - || compile_wg_size[1] != 1 - || compile_wg_size[2] != 1) - { - error = -1; - } - } - else - { - if(compile_wg_size[0] != 1 - || compile_wg_size[1] != 1 - || compile_wg_size[2] != 1) - { - error = -1; - } - } - return error; - } - - // Sets spec constant - cl_int set_spec_constants(const cl_program& program) - { - cl_int error = CL_SUCCESS; - if(m_set_spec_constant) - { - error = clSetProgramSpecializationConstant( - program, cl_uint(1), sizeof(cl_uint), static_cast(&m_work_group_size_0) - ); - RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant") - } - return error; - } - - // Each work-item writes get_local_size(0) to output[work-item-global-id] - std::string generate_program(bool with_attribute) - { - // ----------------------------------------------------------------------------------- - // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ - // ----------------------------------------------------------------------------------- - #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - std::string att = " "; - if(with_attribute) - { - std::string work_group_size_0 = "1"; - if(m_set_spec_constant) - { - work_group_size_0 = std::to_string(m_work_group_size_0); - } - att = "\n__attribute__((reqd_work_group_size(" + work_group_size_0 + ",1,1)))\n"; - } - return - "__kernel" + att + "void " + this->get_kernel_name() + "(global uint *output)\n" - "{\n" - " uint gid = get_global_id(0);\n" - " output[gid] = get_local_size(0);\n" - "}\n"; - - #else - std::string att = ""; - if(with_attribute) - { - att = "[[cl::required_work_group_size(spec1, 1, 1)]]\n"; - } - return - "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "spec_constant spec1{1};\n" - + att + - "__kernel void " + this->get_kernel_name() + "(global_ptr output)\n" - "{\n" - " uint gid = get_global_id(0);\n" - " output[gid] = get_local_size(0);\n" - "}\n"; - #endif - } - - // Each work-item writes get_local_size(0) to output[work-item-global-id] - std::string generate_program() - { - return generate_program(true); - } - -private: - bool m_set_spec_constant; - cl_uint m_work_group_size_0; -}; - -// This function return max work-group size that can be used -// for kernels defined in source -size_t get_max_wg_size(const std::string& source, - const std::vector& kernel_names, - size_t work_group_size, // max wg size we want to have - cl_device_id device, - cl_context context, - cl_command_queue queue, - cl_int& err) -{ - cl_program program; - std::vector kernels; - if(kernel_names.empty()) - { - RETURN_ON_ERROR_MSG(-1, "No kernel to run"); - } - kernels.resize(kernel_names.size()); -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - err = create_opencl_kernel(context, &program, &(kernels[0]), source, kernel_names[0], "-cl-std=CL2.0", false); - RETURN_ON_ERROR(err) - for(size_t i = 1; i < kernels.size(); i++) - { - kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err); - RETURN_ON_CL_ERROR(err, "clCreateKernel"); - } -#else - err = create_opencl_kernel(context, &program, &(kernels[0]), source, kernel_names[0]); - RETURN_ON_ERROR(err) - for(size_t i = 1; i < kernels.size(); i++) - { - kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err); - RETURN_ON_CL_ERROR(err, "clCreateKernel"); - } -#endif - size_t wg_size = work_group_size; - for(auto& k : kernels) - { - size_t max_wg_size; - err = clGetKernelWorkGroupInfo( - k, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL - ); - RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo") - wg_size = (std::min)(wg_size, max_wg_size); - } - return wg_size; -} - -AUTO_TEST_CASE(test_spec_constants_in_kernel_attributes) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// If ONLY_SPIRV_COMPILATION is defined we can't check the max work-group size for the -// kernel because OpenCL kernel object is never created in that mode. -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - const size_t max_wg_size = 16; -#else - // Get max work-group size that can be used in [[cl::required_work_group_size(X, 1, 1)]] - // We do this by building kernel without this attribute and checking what is the max - // work-group size we can use with it. - auto test = spec_const_required_work_group_size_test(true, 1); - const size_t max_wg_size = get_max_wg_size( - test.generate_program(false), test.get_kernel_names(), - 1024, // max wg size we want to test - device, context, queue, - error - ); - RETURN_ON_ERROR_MSG(error, "Can't get max work-group size"); -#endif - - // Run tests when specialization constant spec1 is set (kernel - // attribute is [[cl::required_work_group_size(spec1, 1, 1)]]). - for(size_t i = 1; i <= max_wg_size; i *=2) - { - RUN_SPEC_CONSTANTS_TEST_MACRO( - spec_const_required_work_group_size_test( - true, i - ) - ); - } - // This test does not set spec constant - RUN_SPEC_CONSTANTS_TEST_MACRO( - spec_const_required_work_group_size_test( - false, 9999999 // This value is incorrect, but won't be set and kernel - // attribute should be [[cl::required_work_group_size(1, 1, 1)]] - ) - ); - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_ATTRIBUTES_HPP diff --git a/test_conformance/clcpp/spec_constants/test_spec_consts_if.hpp b/test_conformance/clcpp/spec_constants/test_spec_consts_if.hpp deleted file mode 100644 index 1c7cec2abb..0000000000 --- a/test_conformance/clcpp/spec_constants/test_spec_consts_if.hpp +++ /dev/null @@ -1,161 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_IF_HPP -#define TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_IF_HPP - -#include - -#include "common.hpp" - -// This class tests using specialization constant in if statement -template -struct spec_const_in_if_test : public spec_constants_test -{ - // See generate_program() to know what set_spec_constant is for. - spec_const_in_if_test(const bool set_spec_constant) - : m_set_spec_constant(set_spec_constant) - { - static_assert( - is_vector_type::value == false, - "Specialization constant can be only scalar int or float type" - ); - switch (sizeof(T)) - { - case 1: - m_test_value = T(127); - break; - case 2: - m_test_value = T(0xdeadU); - break; - // 4 and 8 - default: - m_test_value = T(0xdeaddeadU); - break; - } - } - - std::string str() - { - return "spec_const_in_if_" + type_name(); - } - - cl_uint operator()(size_t i, size_t work_group_size) - { - (void) work_group_size; - if(m_set_spec_constant) - { - return m_test_value; - } - return static_cast(i); - } - - // Sets spec constant - cl_int set_spec_constants(const cl_program& program) - { - cl_int error = CL_SUCCESS; - if(m_set_spec_constant) - { - T spec1 = static_cast(m_test_value); - error = clSetProgramSpecializationConstant( - program, cl_uint(1), sizeof(T), static_cast(&spec1) - ); - RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant") - } - return error; - } - - // IF set_spec_constant == true: - // each work-item writes T(m_test_value) to output[work-item-global-id] - // Otherwise: - // each work-item writes T(get_global_id(0)) to output[work-item-global-id] - std::string generate_program() - { - // ----------------------------------------------------------------------------------- - // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ - // ----------------------------------------------------------------------------------- - #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - std::string result = "gid"; - if(m_set_spec_constant) - result = std::to_string(m_test_value); - return - "__kernel void " + this->get_kernel_name() + "(global uint *output)\n" - "{\n" - " uint gid = get_global_id(0);\n" - " output[gid] = " + result + ";\n" - "}\n"; - - #else - return - "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "typedef " + type_name() + " TYPE;\n" - "spec_constant spec1{TYPE(0)};\n" - "__kernel void " + this->get_kernel_name() + "(global_ptr output)\n" - "{\n" - " uint gid = get_global_id(0);\n" - " if(get(spec1) == TYPE(" + std::to_string(m_test_value) +"))\n" - " {\n" - " output[gid] = " + std::to_string(m_test_value) +";\n" - " }\n" - " else\n" - " {\n" - " output[gid] = gid;\n" - " }\n" - "}\n"; - #endif - } - -private: - bool m_set_spec_constant; - cl_uint m_test_value; -}; - -AUTO_TEST_CASE(test_spec_constants_in_if_statement) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - - const std::vector set_spec_const_options { true, false }; - for(auto option : set_spec_const_options) - { - RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test(option)); - RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test(option)); - RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test(option)); - RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test(option)); - RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test(option)); - RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test(option)); - RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test(option)); - if(is_extension_available(device, "cl_khr_fp16")) - { - RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test(option)); - } - if(is_extension_available(device, "cl_khr_fp64")) - { - RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test(option)); - } - } - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_IF_HPP diff --git a/test_conformance/clcpp/spec_constants/test_spec_consts_init_vars.hpp b/test_conformance/clcpp/spec_constants/test_spec_consts_init_vars.hpp deleted file mode 100644 index 20bbff06bc..0000000000 --- a/test_conformance/clcpp/spec_constants/test_spec_consts_init_vars.hpp +++ /dev/null @@ -1,174 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_INIT_VARS_HPP -#define TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_INIT_VARS_HPP - -#include - -#include "common.hpp" - -// This class tests initializing variables with a specialization constant value. -template -struct spec_const_init_var : public spec_constants_test -{ - // See generate_program() to know what set_spec_constant is for. - spec_const_init_var(const bool set_spec_constant) - : m_set_spec_constant(set_spec_constant) - { - static_assert( - is_vector_type::value == false, - "Specialization constant can be only scalar int or float type" - ); - switch (sizeof(T)) - { - case 1: - m_test_value = T(127); - break; - case 2: - m_test_value = T(0xdeadU); - break; - // 4 and 8 - default: - m_test_value = T(0xdeaddeadU); - break; - } - } - - std::string str() - { - return "spec_const_init_var_" + type_name(); - } - - cl_uint operator()(size_t i, size_t work_group_size) - { - (void) work_group_size; - if(m_set_spec_constant) - { - return m_test_value; - } - return static_cast(i); - } - - // Sets spec constant - cl_int set_spec_constants(const cl_program& program) - { - cl_int error = CL_SUCCESS; - if(m_set_spec_constant) - { - T spec = static_cast(m_test_value); - // spec1 - error = clSetProgramSpecializationConstant( - program, cl_uint(1), sizeof(T), static_cast(&spec) - ); - RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant") - - // spec2 - error = clSetProgramSpecializationConstant( - program, cl_uint(2), sizeof(T), static_cast(&spec) - ); - RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant") - } - return error; - } - - // IF set_spec_constant == true: - // each work-item writes T(m_test_value) to output[work-item-global-id] - // Otherwise: - // each work-item writes T(get_global_id(0)) to output[work-item-global-id] - std::string generate_program() - { - // ----------------------------------------------------------------------------------- - // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ - // ----------------------------------------------------------------------------------- - #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - std::string result = "gid"; - if(m_set_spec_constant) - result = std::to_string(m_test_value); - return - "__kernel void " + this->get_kernel_name() + "(global uint *output)\n" - "{\n" - " uint gid = get_global_id(0);\n" - " output[gid] = " + result + ";\n" - "}\n"; - - #else - return - "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "typedef " + type_name() + " TYPE;\n" - "spec_constant spec1{TYPE(0)};\n" - "spec_constant spec2{TYPE(0)};\n" - "__kernel void " + this->get_kernel_name() + "(global_ptr output)\n" - "{\n" - " uint gid = get_global_id(0);\n" - " TYPE var1(spec1.get());\n" - " TYPE var2(spec2);\n" - " TYPE var3; var3 = spec2;\n" - " if((var1 == TYPE(" + std::to_string(m_test_value) +")) " - "&& (var2 == TYPE(" + std::to_string(m_test_value) +"))\n" - "&& (var3 == TYPE(" + std::to_string(m_test_value) +")))\n" - " {\n" - " output[gid] = " + std::to_string(m_test_value) +";\n" - " }\n" - " else\n" - " {\n" - " output[gid] = gid;\n" - " }\n" - "}\n"; - #endif - } - -private: - bool m_set_spec_constant; - cl_uint m_test_value; -}; - -AUTO_TEST_CASE(test_spec_constants_initializing_variables) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - - const std::vector set_spec_const_options { true, false }; - for(auto option : set_spec_const_options) - { - RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var(option)); - RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var(option)); - RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var(option)); - RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var(option)); - RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var(option)); - RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var(option)); - RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var(option)); - if(is_extension_available(device, "cl_khr_fp16")) - { - RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var(option)); - } - if(is_extension_available(device, "cl_khr_fp64")) - { - RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var(option)); - } - } - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_INIT_VARS_HPP diff --git a/test_conformance/clcpp/spirv10_2016.04.27.7z b/test_conformance/clcpp/spirv10_2016.04.27.7z deleted file mode 100644 index 306be24308..0000000000 --- a/test_conformance/clcpp/spirv10_2016.04.27.7z +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fe4f34d616ed7ef70e870c22078f60655f68b0c5191c8d8b9d045dd0e7390bc2 -size 5529152 diff --git a/test_conformance/clcpp/subgroups/CMakeLists.txt b/test_conformance/clcpp/subgroups/CMakeLists.txt deleted file mode 100644 index c8307d26d1..0000000000 --- a/test_conformance/clcpp/subgroups/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -set(MODULE_NAME CPP_SUBGROUPS) - -set(${MODULE_NAME}_SOURCES - main.cpp -) - -include(../../CMakeCommon.txt) diff --git a/test_conformance/clcpp/subgroups/common.hpp b/test_conformance/clcpp/subgroups/common.hpp deleted file mode 100644 index 2b05a3cbc9..0000000000 --- a/test_conformance/clcpp/subgroups/common.hpp +++ /dev/null @@ -1,97 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_SG_COMMON_HPP -#define TEST_CONFORMANCE_CLCPP_SG_COMMON_HPP - -#include -#include -#include - -enum class work_group_op : int { - add, min, max -}; - -std::string to_string(work_group_op op) -{ - switch (op) - { - case work_group_op::add: - return "add"; - case work_group_op::min: - return "min"; - case work_group_op::max: - return "max"; - default: - break; - } - return ""; -} - -template -std::vector generate_input(size_t count, size_t wg_size) -{ - std::vector input(count, CL_INT_TYPE(1)); - switch (op) - { - case work_group_op::add: - return input; - case work_group_op::min: - { - size_t j = wg_size; - for(size_t i = 0; i < count; i++) - { - input[i] = static_cast(j); - j--; - if(j == 0) - { - j = wg_size; - } - } - } - break; - case work_group_op::max: - { - size_t j = 0; - for(size_t i = 0; i < count; i++) - { - input[i] = static_cast(j); - j++; - if(j == wg_size) - { - j = 0; - } - } - } - } - return input; -} - -template -std::vector generate_output(size_t count, size_t wg_size) -{ - switch (op) - { - case work_group_op::add: - return std::vector(count, CL_INT_TYPE(0)); - case work_group_op::min: - return std::vector(count, (std::numeric_limits::max)()); - case work_group_op::max: - return std::vector(count, (std::numeric_limits::min)()); - } - return std::vector(count, CL_INT_TYPE(0)); -} - -#endif // TEST_CONFORMANCE_CLCPP_SG_COMMON_HPP diff --git a/test_conformance/clcpp/subgroups/main.cpp b/test_conformance/clcpp/subgroups/main.cpp deleted file mode 100644 index c81f2315ff..0000000000 --- a/test_conformance/clcpp/subgroups/main.cpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "../common.hpp" - -#include "test_sg_all.hpp" -#include "test_sg_any.hpp" -#include "test_sg_broadcast.hpp" -#include "test_sg_reduce.hpp" -#include "test_sg_scan_inclusive.hpp" -#include "test_sg_scan_exclusive.hpp" - -int main(int argc, const char *argv[]) -{ - auto& tests = autotest::test_suite::global_test_suite().test_defs; - return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0); -} diff --git a/test_conformance/clcpp/subgroups/test_sg_all.hpp b/test_conformance/clcpp/subgroups/test_sg_all.hpp deleted file mode 100644 index 5dc060cef2..0000000000 --- a/test_conformance/clcpp/subgroups/test_sg_all.hpp +++ /dev/null @@ -1,221 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ALL_HPP -#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ALL_HPP - -#include -#include -#include - -// Common for all OpenCL C++ tests -#include "../common.hpp" -// Common for tests of sub-group functions -#include "common.hpp" - -std::string generate_sg_all_kernel_code() -{ - return "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void test_sg_all(global_ptr input, global_ptr output)\n" - "{\n" - " ulong tid = get_global_id(0);\n" - " bool result = sub_group_all(input[tid] < input[tid+1]);\n" - " if(!result) {\n output[tid] = 0;\n return;\n }\n" - " output[tid] = 1;\n" - "}\n"; -} - -int verify_sg_all(const std::vector &in, const std::vector &out, size_t count, size_t wg_size, size_t sg_size) -{ - size_t i, j, k; - for (i = 0; i < count; i += wg_size) - { - for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j+= sg_size) - { - // sub-group all - bool all = true; - for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++) - { - if(!(in[i+j+k] < in[i+j+k+1])) - { - all = false; - break; - } - } - - // Convert bool to uint - cl_uint all_uint = all ? 1 : 0; - // Check if all work-items in sub-group stored correct value - for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++) - { - if (all_uint != out[i + j + k]) - { - log_info( - "sub_group_all %s: Error at %lu: expected = %lu, got = %lu\n", - type_name().c_str(), - i + j, - static_cast(all_uint), - static_cast(out[i + j + k])); - return -1; - } - } - } - } - return CL_SUCCESS; -} - -std::vector generate_input_sg_all(size_t count, size_t wg_size) -{ - std::vector input(count, cl_uint(0)); - size_t j = wg_size; - for(size_t i = 0; i < count; i++) - { - input[i] = static_cast(i); - // In one place in ~half of work-groups (input[tid] < input[tid+1]) will - // generate false, it means that for sub_group_all(input[tid] < input[tid+1]) - // should return false for all sub-groups in that work-groups - if((j == wg_size/2) && (i > count/2)) - { - input[i] = input[i - 1]; - } - j--; - if(j == 0) - { - j = wg_size; - } - } - return input; -} - -std::vector generate_output_sg_all(size_t count, size_t wg_size) -{ - (void) wg_size; - return std::vector(count, cl_uint(1)); -} - -int sub_group_all(cl_device_id device, cl_context context, cl_command_queue queue, size_t count) -{ - cl_mem buffers[2]; - cl_program program; - cl_kernel kernel; - size_t wg_size; - size_t sg_max_size; - size_t work_size[1]; - int err; - - std::string code_str = generate_sg_all_kernel_code(); -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_all"); - RETURN_ON_ERROR(err) - return err; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n"); - return CL_SUCCESS; -#else - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_all"); - RETURN_ON_ERROR(err) -#endif - - err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL); - RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo") - - size_t param_value_size = 0; - err = clGetKernelSubGroupInfo( - kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, - sizeof(size_t), static_cast(&wg_size), - sizeof(size_t), static_cast(&sg_max_size), - ¶m_value_size - ); - RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo") - - // Verify size of returned param - if(param_value_size != sizeof(size_t)) - { - RETURN_ON_ERROR_MSG(-1, - "Returned size of max sub group size not valid! (Expected %lu, got %lu)\n", - sizeof(size_t), - param_value_size - ) - } - - // Calculate global work size - size_t flat_work_size; - size_t wg_number = static_cast( - std::ceil(static_cast(count) / wg_size) - ); - flat_work_size = wg_number * wg_size; - work_size[0] = flat_work_size; - - std::vector input = generate_input_sg_all(flat_work_size + 1, wg_size); - std::vector output = generate_output_sg_all(flat_work_size, wg_size); - - buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_uint) * input.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - buffers[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_uint) * output.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - err = clEnqueueWriteBuffer( - queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(), - static_cast(input.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer"); - - err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]); - err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]); - RETURN_ON_CL_ERROR(err, "clSetKernelArg"); - - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel"); - - err = clEnqueueReadBuffer( - queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(), - static_cast(output.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer"); - - if (verify_sg_all(input, output, flat_work_size, wg_size, sg_max_size) != CL_SUCCESS) - { - RETURN_ON_ERROR_MSG(-1, "sub_group_all failed"); - } - log_info("sub_group_all passed\n"); - - clReleaseMemObject(buffers[0]); - clReleaseMemObject(buffers[1]); - clReleaseKernel(kernel); - clReleaseProgram(program); - return err; -} - -AUTO_TEST_CASE(test_sub_group_all) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int err = CL_SUCCESS; - err = sub_group_all(device, context, queue, n_elems); - CHECK_ERROR(err) - return err; -} - -#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ALL_HPP diff --git a/test_conformance/clcpp/subgroups/test_sg_any.hpp b/test_conformance/clcpp/subgroups/test_sg_any.hpp deleted file mode 100644 index 4c6adce91c..0000000000 --- a/test_conformance/clcpp/subgroups/test_sg_any.hpp +++ /dev/null @@ -1,221 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ANY_HPP -#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ANY_HPP - -#include -#include -#include - -// Common for all OpenCL C++ tests -#include "../common.hpp" -// Common for tests of sub-group functions -#include "common.hpp" - -std::string generate_sg_any_kernel_code() -{ - return "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void test_sg_any(global_ptr input, global_ptr output)\n" - "{\n" - " ulong tid = get_global_id(0);\n" - " bool result = sub_group_any(input[tid] == input[tid+1]);\n" - " if(!result) {\n output[tid] = 0;\n return;\n }\n" - " output[tid] = 1;\n" - "}\n"; -} - -int verify_sg_any(const std::vector &in, const std::vector &out, size_t count, size_t wg_size, size_t sg_size) -{ - size_t i, j, k; - for (i = 0; i < count; i += wg_size) - { - for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j+= sg_size) - { - // sub-group any - bool any = false; - for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++) - { - if(in[i+j+k] == in[i+j+k+1]) - { - any = true; - break; - } - } - - // Convert bool to uint - cl_uint any_uint = any ? 1 : 0; - // Check if all work-items in sub-group stored correct value - for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++) - { - if (any_uint != out[i + j + k]) - { - log_info( - "sub_group_any %s: Error at %lu: expected = %lu, got = %lu\n", - type_name().c_str(), - i + j, - static_cast(any_uint), - static_cast(out[i + j + k])); - return -1; - } - } - } - } - return CL_SUCCESS; -} - -std::vector generate_input_sg_any(size_t count, size_t wg_size) -{ - std::vector input(count, cl_uint(0)); - size_t j = wg_size; - for(size_t i = 0; i < count; i++) - { - input[i] = static_cast(i); - // In one place in ~half of work-groups (input[tid] == input[tid+1]) will - // generate true, it means that for sub_group_all(input[tid] == input[tid+1]) - // should return false for one sub-group in that work-groups - if((j == wg_size/2) && (i > count/2)) - { - input[i] = input[i - 1]; - } - j--; - if(j == 0) - { - j = wg_size; - } - } - return input; -} - -std::vector generate_output_sg_any(size_t count, size_t wg_size) -{ - (void) wg_size; - return std::vector(count, cl_uint(1)); -} - -int sub_group_any(cl_device_id device, cl_context context, cl_command_queue queue, size_t count) -{ - cl_mem buffers[2]; - cl_program program; - cl_kernel kernel; - size_t wg_size; - size_t sg_max_size; - size_t work_size[1]; - int err; - - std::string code_str = generate_sg_any_kernel_code(); -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_any"); - RETURN_ON_ERROR(err) - return err; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n"); - return CL_SUCCESS; -#else - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_any"); - RETURN_ON_ERROR(err) -#endif - - err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL); - RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo") - - size_t param_value_size = 0; - err = clGetKernelSubGroupInfo( - kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, - sizeof(size_t), static_cast(&wg_size), - sizeof(size_t), static_cast(&sg_max_size), - ¶m_value_size - ); - RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo") - - // Verify size of returned param - if(param_value_size != sizeof(size_t)) - { - RETURN_ON_ERROR_MSG(-1, - "Returned size of max sub group size not valid! (Expected %lu, got %lu)\n", - sizeof(size_t), - param_value_size - ) - } - - // Calculate global work size - size_t flat_work_size; - size_t wg_number = static_cast( - std::ceil(static_cast(count) / wg_size) - ); - flat_work_size = wg_number * wg_size; - work_size[0] = flat_work_size; - - std::vector input = generate_input_sg_any(flat_work_size + 1, wg_size); - std::vector output = generate_output_sg_any(flat_work_size, wg_size); - - buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_uint) * input.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - buffers[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_uint) * output.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - err = clEnqueueWriteBuffer( - queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(), - static_cast(input.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer"); - - err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]); - err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]); - RETURN_ON_CL_ERROR(err, "clSetKernelArg"); - - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel"); - - err = clEnqueueReadBuffer( - queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(), - static_cast(output.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer"); - - if (verify_sg_any(input, output, flat_work_size, wg_size, sg_max_size) != CL_SUCCESS) - { - RETURN_ON_ERROR_MSG(-1, "sub_group_any failed"); - } - log_info("sub_group_any passed\n"); - - clReleaseMemObject(buffers[0]); - clReleaseMemObject(buffers[1]); - clReleaseKernel(kernel); - clReleaseProgram(program); - return err; -} - -AUTO_TEST_CASE(test_sub_group_any) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int err = CL_SUCCESS; - err = sub_group_any(device, context, queue, n_elems); - CHECK_ERROR(err) - return err; -} - -#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ANY_HPP diff --git a/test_conformance/clcpp/subgroups/test_sg_broadcast.hpp b/test_conformance/clcpp/subgroups/test_sg_broadcast.hpp deleted file mode 100644 index 22317be592..0000000000 --- a/test_conformance/clcpp/subgroups/test_sg_broadcast.hpp +++ /dev/null @@ -1,206 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_BROADCAST_HPP -#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_BROADCAST_HPP - -#include -#include -#include - -// Common for all OpenCL C++ tests -#include "../common.hpp" -// Common for tests of sub-group functions -#include "common.hpp" - -std::string generate_sg_broadcast_kernel_code() -{ - return - "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void test_sg_broadcast(global_ptr input, global_ptr output)\n" - "{\n" - " ulong tid = get_global_id(0);\n" - " uint result = sub_group_broadcast(input[tid], 0);\n" - " output[tid] = result;\n" - "}\n"; -} - -int -verify_sg_broadcast(const std::vector &in, const std::vector &out, size_t count, size_t wg_size, size_t sg_size) -{ - size_t i, j, k; - for (i = 0; i < count; i += wg_size) - { - for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j+= sg_size) - { - // sub-group broadcast - cl_uint broadcast_result = in[i+j]; - - // Check if all work-items in sub-group stored correct value - for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++) - { - if (broadcast_result != out[i + j + k]) - { - log_info( - "sub_group_any %s: Error at %lu: expected = %lu, got = %lu\n", - type_name().c_str(), - i + j, - static_cast(broadcast_result), - static_cast(out[i + j + k])); - return -1; - } - } - } - } - return CL_SUCCESS; -} - -std::vector generate_input_sg_broadcast(size_t count, size_t wg_size) -{ - std::vector input(count, cl_uint(0)); - size_t j = wg_size; - for(size_t i = 0; i < count; i++) - { - input[i] = static_cast(j); - j--; - if(j == 0) - { - j = wg_size; - } - } - return input; -} - -std::vector generate_output_sg_broadcast(size_t count, size_t wg_size) -{ - (void) wg_size; - return std::vector(count, cl_uint(1)); -} - -int sub_group_broadcast(cl_device_id device, cl_context context, cl_command_queue queue, size_t count) -{ - cl_mem buffers[2]; - cl_program program; - cl_kernel kernel; - size_t wg_size; - size_t sg_max_size; - size_t work_size[] = { 1 }; - int err; - - // Get kernel source code - std::string code_str = generate_sg_broadcast_kernel_code(); - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_broadcast"); - RETURN_ON_ERROR(err) - return err; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n"); - return CL_SUCCESS; -#else - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_broadcast"); - RETURN_ON_ERROR(err) -#endif - - // Get max flat workgroup size - err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL); - RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo") - - size_t param_value_size = 0; - err = clGetKernelSubGroupInfo( - kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, - sizeof(size_t), static_cast(&wg_size), - sizeof(size_t), static_cast(&sg_max_size), - ¶m_value_size - ); - RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo") - - // Verify size of returned param - if(param_value_size != sizeof(size_t)) - { - RETURN_ON_ERROR_MSG(-1, - "Returned size of max sub group size not valid! (Expected %lu, got %lu)\n", - sizeof(size_t), - param_value_size - ) - } - - // Calculate global work size - size_t flat_work_size = count; - size_t wg_number = static_cast( - std::ceil(static_cast(count) / wg_size) - ); - flat_work_size = wg_number * wg_size; - work_size[0] = flat_work_size; - - std::vector input = generate_input_sg_broadcast(flat_work_size, wg_size); - std::vector output = generate_output_sg_broadcast(flat_work_size, wg_size); - - buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_uint) * input.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - buffers[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_uint) * output.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - err = clEnqueueWriteBuffer( - queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(), - static_cast(input.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer"); - - err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]); - err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]); - RETURN_ON_CL_ERROR(err, "clSetKernelArg"); - - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel"); - - err = clEnqueueReadBuffer( - queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(), - static_cast(output.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer"); - - int result = verify_sg_broadcast( input, output, work_size[0], wg_size, sg_max_size); - RETURN_ON_ERROR_MSG(result, "sub_group_broadcast failed") - log_info("sub_group_broadcast passed\n"); - - clReleaseMemObject(buffers[0]); - clReleaseMemObject(buffers[1]); - clReleaseKernel(kernel); - clReleaseProgram(program); - return err; -} - -AUTO_TEST_CASE(test_sub_group_broadcast) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int err = CL_SUCCESS; - err = sub_group_broadcast(device, context, queue, n_elems); - CHECK_ERROR(err) - return err; -} - -#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_BROADCAST_HPP diff --git a/test_conformance/clcpp/subgroups/test_sg_reduce.hpp b/test_conformance/clcpp/subgroups/test_sg_reduce.hpp deleted file mode 100644 index 91acd474f9..0000000000 --- a/test_conformance/clcpp/subgroups/test_sg_reduce.hpp +++ /dev/null @@ -1,348 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_REDUCE_HPP -#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_REDUCE_HPP - -#include -#include -#include - -// Common for all OpenCL C++ tests -#include "../common.hpp" -// Common for tests of sub-group functions -#include "common.hpp" - -template -std::string generate_sg_reduce_kernel_code() -{ - return "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void test_sg_reduce(global_ptr<" + type_name() + "[]> input, " - "global_ptr<" + type_name() + "[]> output)\n" - "{\n" - " ulong tid = get_global_id(0);\n" - " " + type_name() + " result = sub_group_reduce(input[tid]);\n" - " output[tid] = result;\n" - "}\n"; -} - -template -int verify_sg_reduce_add(const std::vector &in, const std::vector &out, size_t wg_size, size_t sg_size) -{ - size_t i, j, k; - for (i = 0; i < in.size(); i += wg_size) - { - for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size) - { - CL_INT_TYPE sum = 0; - for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++) - { - sum += in[i + j + k]; - } - - // Check if all work-items in sub-group stored correct value - for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++) - { - if (sum != out[i + j + k]) - { - log_info( - "sub_group_reduce_add %s: Error at %lu: expected = %lu, got = %lu\n", - type_name().c_str(), - i + j, - static_cast(sum), - static_cast(out[i + j + k])); - return -1; - } - } - } - } - return CL_SUCCESS; -} - -template -int verify_sg_reduce_min(const std::vector &in, const std::vector &out, size_t wg_size, size_t sg_size) -{ - size_t i, j, k; - for (i = 0; i < in.size(); i += wg_size) - { - for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size) - { - CL_INT_TYPE min = (std::numeric_limits::max)(); - for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++) - { - min = std::min(min, in[i + j + k]); - } - - // Check if all work-items in sub-group stored correct value - for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++) - { - if (min != out[i + j + k]) - { - log_info( - "sub_group_reduce_min %s: Error at %lu: expected = %lu, got = %lu\n", - type_name().c_str(), - i + j, - static_cast(min), - static_cast(out[i + j + k])); - return -1; - } - } - } - } - return CL_SUCCESS; -} - -template -int verify_sg_reduce_max(const std::vector &in, const std::vector &out, size_t wg_size, size_t sg_size) -{ - size_t i, j, k; - for (i = 0; i < in.size(); i += wg_size) - { - for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size) - { - CL_INT_TYPE max = (std::numeric_limits::min)(); - for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++) - { - max = std::max(max, in[i + j + k]); - } - - // Check if all work-items in sub-group stored correct value - for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++) - { - if (max != out[i + j + k]) - { - log_info( - "sub_group_reduce_max %s: Error at %lu: expected = %lu, got = %lu\n", - type_name().c_str(), - i + j, - static_cast(max), - static_cast(out[i + j + k])); - return -1; - } - } - } - } - return CL_SUCCESS; -} - -template -int verify_sg_reduce(const std::vector &in, const std::vector &out, size_t wg_size, size_t sg_size) -{ - switch (op) - { - case work_group_op::add: - return verify_sg_reduce_add(in, out, wg_size, sg_size); - case work_group_op::min: - return verify_sg_reduce_min(in, out, wg_size, sg_size); - case work_group_op::max: - return verify_sg_reduce_max(in, out, wg_size, sg_size); - } - return -1; -} - -template -int sub_group_reduce(cl_device_id device, cl_context context, cl_command_queue queue, size_t count) -{ - // don't run test for unsupported types - if(!type_supported(device)) - { - return CL_SUCCESS; - } - - cl_mem buffers[2]; - cl_program program; - cl_kernel kernel; - size_t wg_size; - size_t sg_max_size; - size_t work_size[1]; - int err; - - std::string code_str = generate_sg_reduce_kernel_code(); -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_reduce"); - RETURN_ON_ERROR(err) - return err; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n"); - return CL_SUCCESS; -#else - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_reduce"); - RETURN_ON_ERROR(err) -#endif - - err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL); - RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo") - - size_t param_value_size = 0; - err = clGetKernelSubGroupInfo( - kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, - sizeof(size_t), static_cast(&wg_size), - sizeof(size_t), static_cast(&sg_max_size), - ¶m_value_size - ); - RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo") - - // Verify size of returned param - if(param_value_size != sizeof(size_t)) - { - RETURN_ON_ERROR_MSG(-1, - "Returned size of max sub group size not valid! (Expected %lu, got %lu)\n", - sizeof(size_t), - param_value_size - ) - } - - // Calculate global work size - size_t flat_work_size; - size_t wg_number = static_cast( - std::ceil(static_cast(count) / wg_size) - ); - flat_work_size = wg_number * wg_size; - work_size[0] = flat_work_size; - - std::vector input = generate_input(flat_work_size, wg_size); - std::vector output = generate_output(flat_work_size, wg_size); - - buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(CL_INT_TYPE) * input.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - buffers[1] = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(CL_INT_TYPE) * output.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - err = clEnqueueWriteBuffer( - queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(), - static_cast(input.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer"); - - err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]); - err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]); - RETURN_ON_CL_ERROR(err, "clSetKernelArg"); - - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel"); - - err = clEnqueueReadBuffer( - queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(), - static_cast(output.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer"); - - if (verify_sg_reduce(input, output, wg_size, sg_max_size) != CL_SUCCESS) - { - RETURN_ON_ERROR_MSG(-1, "sub_group_reduce_%s %s failed", to_string(op).c_str(), type_name().c_str()); - } - log_info("sub_group_reduce_%s %s passed\n", to_string(op).c_str(), type_name().c_str()); - - clReleaseMemObject(buffers[0]); - clReleaseMemObject(buffers[1]); - clReleaseKernel(kernel); - clReleaseProgram(program); - return err; -} - -AUTO_TEST_CASE(test_sub_group_reduce_add) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int local_error = CL_SUCCESS; - - local_error = sub_group_reduce(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = sub_group_reduce(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = sub_group_reduce(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = sub_group_reduce(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - if(error != CL_SUCCESS) - return -1; - return CL_SUCCESS; -} - -AUTO_TEST_CASE(test_sub_group_reduce_min) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int local_error = CL_SUCCESS; - - local_error = sub_group_reduce(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = sub_group_reduce(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = sub_group_reduce(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = sub_group_reduce(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - if(error != CL_SUCCESS) - return -1; - return CL_SUCCESS; -} - -AUTO_TEST_CASE(test_sub_group_reduce_max) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int local_error = CL_SUCCESS; - - local_error = sub_group_reduce(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = sub_group_reduce(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = sub_group_reduce(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = sub_group_reduce(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - if(error != CL_SUCCESS) - return -1; - return CL_SUCCESS; -} - -#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_REDUCE_HPP diff --git a/test_conformance/clcpp/subgroups/test_sg_scan_exclusive.hpp b/test_conformance/clcpp/subgroups/test_sg_scan_exclusive.hpp deleted file mode 100644 index 72081750ec..0000000000 --- a/test_conformance/clcpp/subgroups/test_sg_scan_exclusive.hpp +++ /dev/null @@ -1,328 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_EXCLUSIVE_HPP -#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_EXCLUSIVE_HPP - -#include -#include - -// Common for all OpenCL C++ tests -#include "../common.hpp" -// Common for tests of sub-group functions -#include "common.hpp" - -template -std::string generate_sg_scan_exclusive_kernel_code() -{ - return "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void test_sg_scan_exclusive(global_ptr<" + type_name() + "[]> input, " - "global_ptr<" + type_name() + "[]> output)\n" - "{\n" - " ulong tid = get_global_id(0);\n" - " " + type_name() + " result = sub_group_scan_exclusive(input[tid]);\n" - " output[tid] = result;\n" - "}\n"; -} - -template -int verify_sg_scan_exclusive_add(const std::vector &in, const std::vector &out, size_t wg_size, size_t sg_size) -{ - size_t i, j, k; - for (i = 0; i < in.size(); i += wg_size) - { - for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size) - { - CL_INT_TYPE sum = 0; - // Check if all work-items in sub-group stored correct value - for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++) - { - if (sum != out[i + j + k]) - { - log_info( - "sub_group_scan_exclusive_add %s: Error at %lu: expected = %lu, got = %lu\n", - type_name().c_str(), - i + j, - static_cast(sum), - static_cast(out[i + j + k])); - return -1; - } - sum += in[i + j + k]; - } - } - } - return CL_SUCCESS; -} - -template -int verify_sg_scan_exclusive_min(const std::vector &in, const std::vector &out, size_t wg_size, size_t sg_size) -{ - size_t i, j, k; - for (i = 0; i < in.size(); i += wg_size) - { - for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size) - { - CL_INT_TYPE min = (std::numeric_limits::max)(); - // Check if all work-items in sub-group stored correct value - for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++) - { - if (min != out[i + j + k]) - { - log_info( - "sub_group_scan_exclusive_min %s: Error at %lu: expected = %lu, got = %lu\n", - type_name().c_str(), - i + j, - static_cast(min), - static_cast(out[i + j + k])); - return -1; - } - min = std::min(min, in[i + j + k]); - } - } - } - return CL_SUCCESS; -} - -template -int verify_sg_scan_exclusive_max(const std::vector &in, const std::vector &out, size_t wg_size, size_t sg_size) -{ - size_t i, j, k; - for (i = 0; i < in.size(); i += wg_size) - { - for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size) - { - CL_INT_TYPE max = (std::numeric_limits::min)(); - // Check if all work-items in sub-group stored correct value - for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++) - { - if (max != out[i + j + k]) - { - log_info( - "sub_group_scan_exclusive_max %s: Error at %lu: expected = %lu, got = %lu\n", - type_name().c_str(), - i + j, - static_cast(max), - static_cast(out[i + j + k])); - return -1; - } - max = std::max(max, in[i + j + k]); - } - } - } - return CL_SUCCESS; -} - -template -int verify_sg_scan_exclusive(const std::vector &in, const std::vector &out, size_t wg_size, size_t sg_size) -{ - switch (op) - { - case work_group_op::add: - return verify_sg_scan_exclusive_add(in, out, wg_size, sg_size); - case work_group_op::min: - return verify_sg_scan_exclusive_min(in, out, wg_size, sg_size); - case work_group_op::max: - return verify_sg_scan_exclusive_max(in, out, wg_size, sg_size); - } - return -1; -} - -template -int sub_group_scan_exclusive(cl_device_id device, cl_context context, cl_command_queue queue, size_t count) -{ - // don't run test for unsupported types - if(!type_supported(device)) - { - return CL_SUCCESS; - } - - cl_mem buffers[2]; - cl_program program; - cl_kernel kernel; - size_t wg_size; - size_t sg_max_size; - size_t work_size[1]; - int err; - - std::string code_str = generate_sg_scan_exclusive_kernel_code(); -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_scan_exclusive"); - RETURN_ON_ERROR(err) - return err; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n"); - return CL_SUCCESS; -#else - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_scan_exclusive"); - RETURN_ON_ERROR(err) -#endif - - err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL); - RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo") - - size_t param_value_size = 0; - err = clGetKernelSubGroupInfo( - kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, - sizeof(size_t), static_cast(&wg_size), - sizeof(size_t), static_cast(&sg_max_size), - ¶m_value_size - ); - RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo") - - // Verify size of returned param - if(param_value_size != sizeof(size_t)) - { - RETURN_ON_ERROR_MSG(-1, - "Returned size of max sub group size not valid! (Expected %lu, got %lu)\n", - sizeof(size_t), - param_value_size - ) - } - - // Calculate global work size - size_t flat_work_size; - size_t wg_number = static_cast( - std::ceil(static_cast(count) / wg_size) - ); - flat_work_size = wg_number * wg_size; - work_size[0] = flat_work_size; - - std::vector input = generate_input(flat_work_size, wg_size); - std::vector output = generate_output(flat_work_size, wg_size); - - buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(CL_INT_TYPE) * input.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - buffers[1] = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(CL_INT_TYPE) * output.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - err = clEnqueueWriteBuffer( - queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(), - static_cast(input.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer"); - - err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]); - err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]); - RETURN_ON_CL_ERROR(err, "clSetKernelArg"); - - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel"); - - err = clEnqueueReadBuffer( - queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(), - static_cast(output.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer"); - - if (verify_sg_scan_exclusive(input, output, wg_size, sg_max_size) != CL_SUCCESS) - { - RETURN_ON_ERROR_MSG(-1, "sub_group_scan_exclusive_%s %s failed", to_string(op).c_str(), type_name().c_str()); - } - log_info("sub_group_scan_exclusive_%s %s passed\n", to_string(op).c_str(), type_name().c_str()); - - clReleaseMemObject(buffers[0]); - clReleaseMemObject(buffers[1]); - clReleaseKernel(kernel); - clReleaseProgram(program); - return err; -} - -AUTO_TEST_CASE(test_sub_group_scan_exclusive_add) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int local_error = CL_SUCCESS; - - local_error = sub_group_scan_exclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = sub_group_scan_exclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = sub_group_scan_exclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = sub_group_scan_exclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - if(error != CL_SUCCESS) - return -1; - return CL_SUCCESS; -} - -AUTO_TEST_CASE(test_sub_group_scan_exclusive_min) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int local_error = CL_SUCCESS; - - local_error = sub_group_scan_exclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - local_error = sub_group_scan_exclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - local_error = sub_group_scan_exclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - local_error = sub_group_scan_exclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - - if(error != CL_SUCCESS) - return -1; - return CL_SUCCESS; -} - -AUTO_TEST_CASE(test_sub_group_scan_exclusive_max) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int local_error = CL_SUCCESS; - - local_error = sub_group_scan_exclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = sub_group_scan_exclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = sub_group_scan_exclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = sub_group_scan_exclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - if(error != CL_SUCCESS) - return -1; - return CL_SUCCESS; -} - -#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_EXCLUSIVE_HPP diff --git a/test_conformance/clcpp/subgroups/test_sg_scan_inclusive.hpp b/test_conformance/clcpp/subgroups/test_sg_scan_inclusive.hpp deleted file mode 100644 index 0218668cbe..0000000000 --- a/test_conformance/clcpp/subgroups/test_sg_scan_inclusive.hpp +++ /dev/null @@ -1,335 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_INCLUSIVE_HPP -#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_INCLUSIVE_HPP - -#include -#include - -// Common for all OpenCL C++ tests -#include "../common.hpp" -// Common for tests of sub-group functions -#include "common.hpp" - -template -std::string generate_sg_scan_inclusive_kernel_code() -{ - return "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void test_sg_scan_inclusive(global_ptr<" + type_name() + "[]> input, " - "global_ptr<" + type_name() + "[]> output)\n" - "{\n" - " ulong tid = get_global_id(0);\n" - " " + type_name() + " result = sub_group_scan_inclusive(input[tid]);\n" - " output[tid] = result;\n" - "}\n"; -} - -template -int verify_sg_scan_inclusive_add(const std::vector &in, const std::vector &out, size_t wg_size, size_t sg_size) -{ - size_t i, j, k; - for (i = 0; i < in.size(); i += wg_size) - { - for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size) - { - CL_INT_TYPE sum = 0; - // Check if all work-items in sub-group stored correct value - for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++) - { - sum += in[i + j + k]; - if (sum != out[i + j + k]) - { - log_info( - "sub_group_scan_exclusive_add %s: Error at %lu: expected = %lu, got = %lu\n", - type_name().c_str(), - i + j, - static_cast(sum), - static_cast(out[i + j + k])); - return -1; - } - } - } - } - return CL_SUCCESS; -} - -template -int verify_sg_scan_inclusive_min(const std::vector &in, const std::vector &out, size_t wg_size, size_t sg_size) -{ - size_t i, j, k; - for (i = 0; i < in.size(); i += wg_size) - { - for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size) - { - CL_INT_TYPE min = (std::numeric_limits::max)(); - // Check if all work-items in sub-group stored correct value - for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++) - { - min = std::min(min, in[i + j + k]); - if (min != out[i + j + k]) - { - log_info( - "sub_group_scan_exclusive_min %s: Error at %lu: expected = %lu, got = %lu\n", - type_name().c_str(), - i + j, - static_cast(min), - static_cast(out[i + j + k])); - return -1; - } - } - } - } - return CL_SUCCESS; -} - -template -int verify_sg_scan_inclusive_max(const std::vector &in, const std::vector &out, size_t wg_size, size_t sg_size) -{ - size_t i, j, k; - for (i = 0; i < in.size(); i += wg_size) - { - for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size) - { - CL_INT_TYPE max = (std::numeric_limits::min)(); - // Check if all work-items in sub-group stored correct value - for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++) - { - max = std::max(max, in[i + j + k]); - if (max != out[i + j + k]) - { - log_info( - "sub_group_scan_exclusive_max %s: Error at %lu: expected = %lu, got = %lu\n", - type_name().c_str(), - i + j, - static_cast(max), - static_cast(out[i + j + k])); - return -1; - } - } - } - } - return CL_SUCCESS; -} - -template -int verify_sg_scan_inclusive(const std::vector &in, const std::vector &out, size_t wg_size, size_t sg_size) -{ - switch (op) - { - case work_group_op::add: - return verify_sg_scan_inclusive_add(in, out, wg_size, sg_size); - case work_group_op::min: - return verify_sg_scan_inclusive_min(in, out, wg_size, sg_size); - case work_group_op::max: - return verify_sg_scan_inclusive_max(in, out, wg_size, sg_size); - } - return -1; -} - -template -int sub_group_scan_inclusive(cl_device_id device, cl_context context, cl_command_queue queue, size_t count) -{ - // don't run test for unsupported types - if(!type_supported(device)) - { - return CL_SUCCESS; - } - - cl_mem buffers[2]; - cl_program program; - cl_kernel kernel; - size_t wg_size; - size_t sg_max_size; - size_t work_size[1]; - int err; - - std::string code_str = generate_sg_scan_inclusive_kernel_code(); -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_scan_inclusive"); - RETURN_ON_ERROR(err) - return err; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n"); - return CL_SUCCESS; -#else - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_scan_inclusive"); - RETURN_ON_ERROR(err) -#endif - - err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL); - RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo") - - size_t param_value_size = 0; - err = clGetKernelSubGroupInfo( - kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, - sizeof(size_t), static_cast(&wg_size), - sizeof(size_t), static_cast(&sg_max_size), - ¶m_value_size - ); - RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo") - - // Verify size of returned param - if(param_value_size != sizeof(size_t)) - { - RETURN_ON_ERROR_MSG(-1, - "Returned size of max sub group size not valid! (Expected %lu, got %lu)\n", - sizeof(size_t), - param_value_size - ) - } - - // Calculate global work size - size_t flat_work_size; - size_t wg_number = static_cast( - std::ceil(static_cast(count) / wg_size) - ); - flat_work_size = wg_number * wg_size; - work_size[0] = flat_work_size; - - std::vector input = generate_input(flat_work_size, wg_size); - std::vector output = generate_output(flat_work_size, wg_size); - - buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(CL_INT_TYPE) * input.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - buffers[1] = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(CL_INT_TYPE) * output.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - err = clEnqueueWriteBuffer( - queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(), - static_cast(input.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer"); - - err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]); - err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]); - RETURN_ON_CL_ERROR(err, "clSetKernelArg"); - - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel"); - - err = clEnqueueReadBuffer( - queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(), - static_cast(output.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer"); - - if (verify_sg_scan_inclusive(input, output, wg_size, sg_max_size) != CL_SUCCESS) - { - RETURN_ON_ERROR_MSG(-1, "sub_group_scan_inclusive_%s %s failed", to_string(op).c_str(), type_name().c_str()); - } - log_info("sub_group_scan_inclusive_%s %s passed\n", to_string(op).c_str(), type_name().c_str()); - - clReleaseMemObject(buffers[0]); - clReleaseMemObject(buffers[1]); - clReleaseKernel(kernel); - clReleaseProgram(program); - return err; -} - -AUTO_TEST_CASE(test_sub_group_scan_inclusive_add) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int local_error = CL_SUCCESS; - - local_error = sub_group_scan_inclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = sub_group_scan_inclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = sub_group_scan_inclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = sub_group_scan_inclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - if(error != CL_SUCCESS) - return -1; - return CL_SUCCESS; -} - -AUTO_TEST_CASE(test_sub_group_scan_inclusive_min) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int local_error = CL_SUCCESS; - - local_error = sub_group_scan_inclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = sub_group_scan_inclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = sub_group_scan_inclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = sub_group_scan_inclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - if(error != CL_SUCCESS) - return -1; - return CL_SUCCESS; -} - -AUTO_TEST_CASE(test_sub_group_scan_inclusive_max) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int local_error = CL_SUCCESS; - - local_error = sub_group_scan_inclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = sub_group_scan_inclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = sub_group_scan_inclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = sub_group_scan_inclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - if(error != CL_SUCCESS) - return -1; - return CL_SUCCESS; -} - -#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_INCLUSIVE_HPP diff --git a/test_conformance/clcpp/synchronization/CMakeLists.txt b/test_conformance/clcpp/synchronization/CMakeLists.txt deleted file mode 100644 index 70d3637cb8..0000000000 --- a/test_conformance/clcpp/synchronization/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -set(MODULE_NAME CPP_SYNCHRONIZATION) - -set(${MODULE_NAME}_SOURCES - main.cpp -) - -include(../../CMakeCommon.txt) diff --git a/test_conformance/clcpp/synchronization/main.cpp b/test_conformance/clcpp/synchronization/main.cpp deleted file mode 100644 index 04b5f36a54..0000000000 --- a/test_conformance/clcpp/synchronization/main.cpp +++ /dev/null @@ -1,27 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "../common.hpp" - -#include "test_work_group_barrier.hpp" -#include "test_sub_group_barrier.hpp" -#include "named_barrier/test_spec_example.hpp" -#include "named_barrier/test_named_barrier.hpp" - -int main(int argc, const char *argv[]) -{ - auto& tests = autotest::test_suite::global_test_suite().test_defs; - return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0); -} diff --git a/test_conformance/clcpp/synchronization/named_barrier/common.hpp b/test_conformance/clcpp/synchronization/named_barrier/common.hpp deleted file mode 100644 index e6ce8b208c..0000000000 --- a/test_conformance/clcpp/synchronization/named_barrier/common.hpp +++ /dev/null @@ -1,172 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_COMMON_HPP -#define TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_COMMON_HPP - -#include - -// Common for all OpenCL C++ tests -#include "../../common.hpp" -#include "../../funcs_test_utils.hpp" - -#define RUN_WG_NAMED_BARRIER_TEST_MACRO(TEST_CLASS) \ - last_error = run_work_group_named_barrier_barrier_test( \ - device, context, queue, num_elements, TEST_CLASS \ - ); \ - CHECK_ERROR(last_error) \ - error |= last_error; - -namespace named_barrier { - -struct work_group_named_barrier_test_base : public detail::base_func_type -{ - // Returns test name - virtual std::string str() = 0; - // Returns OpenCL program source - // It's assumed that this program has only one kernel. - virtual std::string generate_program() = 0; - // Return value that is expected to be in output_buffer[i] - virtual cl_uint operator()(size_t i, size_t work_group_size, size_t mas_sub_group_size) = 0; - // Kernel execution - // This covers typical case: kernel is executed once, kernel - // has only one argument which is output buffer - virtual cl_int execute(const cl_kernel kernel, - const cl_mem output_buffer, - const cl_command_queue& queue, - const size_t work_size, - const size_t work_group_size) - { - cl_int err; - err = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer); - RETURN_ON_CL_ERROR(err, "clSetKernelArg") - - err = clEnqueueNDRangeKernel( - queue, kernel, 1, - NULL, &work_size, &work_group_size, - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel") - return err; - } - // Calculates maximal work-group size (one dim) - virtual size_t get_max_local_size(const cl_kernel kernel, - const cl_device_id device, - const size_t work_group_size, // default work-group size - cl_int& error) - { - size_t max_wg_size; - error = clGetKernelWorkGroupInfo( - kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL - ); - RETURN_ON_ERROR(error) - return (std::min)(work_group_size, max_wg_size); - } - // if work-groups should be uniform - virtual bool enforce_uniform() - { - return false; - } -}; - -template -int run_work_group_named_barrier_barrier_test(cl_device_id device, cl_context context, cl_command_queue queue, - size_t count, work_group_named_barrier_test test) -{ - cl_mem buffers[1]; - cl_program program; - cl_kernel kernel; - size_t work_group_size; - size_t work_size[1]; - cl_int err; - - std::string code_str = test.generate_program(); - std::string kernel_name = test.get_kernel_name(); -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name); - return err; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false); - RETURN_ON_ERROR(err) -#else - err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name); - RETURN_ON_ERROR(err) -#endif - - // Find the max possible wg size for among all the kernels - work_group_size = test.get_max_local_size(kernel, device, 256, err); - RETURN_ON_ERROR(err); - if(work_group_size == 0) - { - log_info("SKIPPED: Can't produce local size with enough sub-groups. Skipping tests.\n"); - return CL_SUCCESS; - } - - work_size[0] = count; - // uniform work-group - if(test.enforce_uniform()) - { - size_t wg_number = static_cast( - std::ceil(static_cast(work_size[0]) / work_group_size) - ); - work_size[0] = wg_number * work_group_size; - } - - // host output vector - std::vector output = generate_output(work_size[0], 9999); - - // device output buffer - buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_uint) * output.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer") - - // Execute test kernels - err = test.execute(kernel, buffers[0], queue, work_size[0], work_group_size); - RETURN_ON_ERROR(err) - - err = clEnqueueReadBuffer( - queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * output.size(), - static_cast(output.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer") - - // Check output values - for(size_t i = 0; i < output.size(); i++) - { - cl_uint v = test(i, work_group_size, i); - if(!(are_equal(v, output[i], ::detail::make_value(0), test))) - { - RETURN_ON_ERROR_MSG(-1, - "test_%s(%s) failed. Expected: %s, got: %s", test.str().c_str(), type_name().c_str(), - format_value(v).c_str(), format_value(output[i]).c_str() - ); - } - } - log_info("test_%s(%s) passed\n", test.str().c_str(), type_name().c_str()); - - clReleaseMemObject(buffers[0]); - clReleaseKernel(kernel); - clReleaseProgram(program); - return err; -} - -} // namespace named_barrier - -#endif // TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_COMMON_HPP diff --git a/test_conformance/clcpp/synchronization/named_barrier/test_named_barrier.hpp b/test_conformance/clcpp/synchronization/named_barrier/test_named_barrier.hpp deleted file mode 100644 index a0f57b24ff..0000000000 --- a/test_conformance/clcpp/synchronization/named_barrier/test_named_barrier.hpp +++ /dev/null @@ -1,491 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_TEST_NAMED_BARRIER_HPP -#define TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_TEST_NAMED_BARRIER_HPP - -#include "common.hpp" - -namespace named_barrier { - -struct local_fence_named_barrier_test : public work_group_named_barrier_test_base -{ - std::string str() - { - return "local_fence"; - } - - // Return value that is expected to be in output_buffer[i] - cl_uint operator()(size_t i, size_t work_group_size, size_t max_sub_group_size) - { - return static_cast(i); - } - - // At the end every work-item writes its global id to ouput[work-item-global-id]. - std::string generate_program() - { - // ----------------------------------------------------------------------------------- - // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ - // ----------------------------------------------------------------------------------- - #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - return - "__kernel void " + this->get_kernel_name() + "(global uint *output, " - "local uint * lmem)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " output[gid] = gid;\n" - "}\n"; - - #else - return - "#define cl_khr_subgroup_named_barrier\n" - "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void " + this->get_kernel_name() + "(global_ptr output, " - "local_ptr lmem)\n" - "{\n\n" - " local a(1);\n" - " local b(2);\n" - " size_t gid = get_global_id(0);\n" - " size_t lid = get_local_id(0);\n" - " size_t value;\n" - " if(get_num_sub_groups() == 1)\n" - " {\n" - " size_t other_lid = (lid + 1) % get_enqueued_local_size(0);\n" - " size_t other_gid = (gid - lid) + other_lid;\n" - " lmem[other_lid] = other_gid;\n" - " a.wait(mem_fence::local);\n" - " value = lmem[lid];" // lmem[lid] shoule be equal to gid - " }\n" - " else if(get_num_sub_groups() == 2)\n" - " {\n" - " size_t other_lid = (lid + get_max_sub_group_size()) % get_enqueued_local_size(0);\n" - " size_t other_gid = (gid - lid) + other_lid;\n" - " lmem[other_lid] = other_gid;\n" - " b.wait(mem_fence::local);\n" - " value = lmem[lid];" // lmem[lid] shoule be equal to gid - " }\n" - " else if(get_num_sub_groups() > 2)\n" - " {\n" - " if(get_sub_group_id() < 2)\n" - " {\n" - " const size_t two_first_subgroups = 2 * get_max_sub_group_size();" - // local and global id of some work-item outside of work-item subgroup, - // but within subgroups 0 and 1. - " size_t other_lid = (lid + get_max_sub_group_size()) % two_first_subgroups;\n" - " size_t other_gid = (gid - lid) + other_lid;\n" - " lmem[other_lid] = other_gid;\n" - " b.wait(mem_fence::local);\n" // subgroup 0 and 1 are sync (local) - " value = lmem[lid];" // lmem[lid] shoule be equal to gid - " }\n" - " else\n" - " {\n" - " value = gid;\n" - " }\n" - " }\n" - " output[gid] = value;\n" - "}\n"; - #endif - } - - size_t get_max_local_size(const cl_kernel kernel, - const cl_device_id device, - const size_t work_group_size, // default work-group size - cl_int& error) - { - // Set size of the local memory, we need to to this to correctly calculate - // max possible work-group size. - size_t wg_size; - for(wg_size = work_group_size; wg_size > 1; wg_size /= 2) - { - error = clSetKernelArg(kernel, 1, wg_size * sizeof(cl_uint), NULL); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - - size_t max_wg_size; - error = clGetKernelWorkGroupInfo( - kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL - ); - RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo") - if(max_wg_size >= wg_size) break; - } - return wg_size; - } - - cl_int execute(const cl_kernel kernel, - const cl_mem output_buffer, - const cl_command_queue queue, - const size_t work_size, - const size_t work_group_size) - { - cl_int err; - // Get context from queue - cl_context context; - err = clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, NULL); - RETURN_ON_CL_ERROR(err, "clGetCommandQueueInfo") - - err = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer); - err |= clSetKernelArg(kernel, 1, work_group_size * sizeof(cl_uint), NULL); - RETURN_ON_CL_ERROR(err, "clSetKernelArg") - - err = clEnqueueNDRangeKernel( - queue, kernel, 1, - NULL, &work_size, &work_group_size, - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel") - - err = clFinish(queue); - return err; - } -}; - -struct global_fence_named_barrier_test : public work_group_named_barrier_test_base -{ - std::string str() - { - return "global_fence"; - } - - // Return value that is expected to be in output_buffer[i] - cl_uint operator()(size_t i, size_t work_group_size, size_t max_sub_group_size) - { - return static_cast(i % work_group_size); - } - - // At the end every work-item writes its local id to ouput[work-item-global-id]. - std::string generate_program() - { - // ----------------------------------------------------------------------------------- - // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ - // ----------------------------------------------------------------------------------- - #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - return - "__kernel void " + this->get_kernel_name() + "(global uint * output, " - "global uint * temp)\n" - "{\n" - "size_t gid = get_global_id(0);\n" - "output[gid] = get_local_id(0);\n" - "}\n"; - - #else - return - "#define cl_khr_subgroup_named_barrier\n" - "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void " + this->get_kernel_name() + "(global_ptr output, " - "global_ptr temp)\n" - "{\n\n" - " local a(1);\n" - " local b(2);\n" - " size_t gid = get_global_id(0);\n" - " size_t lid = get_local_id(0);\n" - " size_t value;\n" - " if(get_num_sub_groups() == 1)\n" - " {\n" - " size_t other_lid = (lid + 1) % get_enqueued_local_size(0);\n" - " size_t other_gid = (gid - lid) + other_lid;\n" - " temp[other_gid] = other_lid + 1;\n" - " a.wait(mem_fence::global);\n" - " size_t other_lid_same_subgroup = (lid + 2) % get_sub_group_size();\n" - " size_t other_gid_same_subgroup = (gid - lid) + other_lid_same_subgroup;\n" - " temp[other_gid_same_subgroup] = temp[other_gid_same_subgroup] - 1;\n" - " a.wait(mem_fence::global, memory_scope_sub_group);\n" - " value = temp[gid];" // temp[gid] shoule be equal to lid - " }\n" - " else if(get_num_sub_groups() == 2)\n" - " {\n" - " size_t other_lid = (lid + get_max_sub_group_size()) % get_enqueued_local_size(0);\n" - " size_t other_gid = (gid - lid) + other_lid;\n" - " temp[other_gid] = other_lid + 1;\n" - " b.wait(mem_fence::global);\n" // both subgroups wait, both are sync - " size_t other_lid_same_subgroup = " - "((lid + 1) % get_sub_group_size()) + (get_sub_group_id() * get_sub_group_size());\n" - " size_t other_gid_same_subgroup = (gid - lid) + other_lid_same_subgroup;\n" - " temp[other_gid_same_subgroup] = temp[other_gid_same_subgroup] - 1;\n" - " b.wait(mem_fence::global, memory_scope_sub_group);\n" // both subgroups wait, sync only within subgroup - " value = temp[gid];" // temp[gid] shoule be equal to lid - " }\n" - " else if(get_num_sub_groups() > 2)\n" - " {\n" - " if(get_sub_group_id() < 2)\n" - " {\n" - " const size_t two_first_subgroups = 2 * get_max_sub_group_size();" - // local and global id of some work-item outside of work-item subgroup, - // but within subgroups 0 and 1. - " size_t other_lid = (lid + get_max_sub_group_size()) % two_first_subgroups;\n" - " size_t other_gid = (gid - lid) + other_lid;\n" - " temp[other_gid] = other_lid + 1;\n" - " b.wait(mem_fence::global);\n" // both subgroups wait, both are sync - // local and global id of some other work-item within work-item subgroup - " size_t other_lid_same_subgroup = " - "((lid + 1) % get_sub_group_size()) + (get_sub_group_id() * get_sub_group_size());\n" - " size_t other_gid_same_subgroup = (gid - lid) + other_lid_same_subgroup;\n" - " temp[other_gid_same_subgroup] = temp[other_gid_same_subgroup] - 1;\n" - " b.wait(mem_fence::global, memory_scope_sub_group);\n" // both subgroups wait, sync only within subgroup - " value = temp[gid];" // temp[gid] shoule be equal to lid - " }\n" - " else\n" - " {\n" - " value = lid;\n" - " }\n" - " }\n" - " output[gid] = value;\n" - "}\n"; - #endif - } - - size_t get_max_local_size(const cl_kernel kernel, - const cl_device_id device, - const size_t work_group_size, // default work-group size - cl_int& error) - { - size_t max_wg_size; - error = clGetKernelWorkGroupInfo( - kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL - ); - RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo") - return (std::min)(max_wg_size, work_group_size); - } - - cl_int execute(const cl_kernel kernel, - const cl_mem output_buffer, - const cl_command_queue queue, - const size_t work_size, - const size_t work_group_size) - { - cl_int err; - // Get context from queue - cl_context context; - err = clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, NULL); - RETURN_ON_CL_ERROR(err, "clGetCommandQueueInfo") - - // create temp buffer - auto temp_buffer = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_uint) * work_size, NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer") - - err = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer); - err |= clSetKernelArg(kernel, 1, sizeof(temp_buffer), &temp_buffer); - RETURN_ON_CL_ERROR(err, "clSetKernelArg") - - err = clEnqueueNDRangeKernel( - queue, kernel, 1, - NULL, &work_size, &work_group_size, - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel") - - err = clFinish(queue); - err |= clReleaseMemObject(temp_buffer); - - return err; - } -}; - -struct global_local_fence_named_barrier_test : public work_group_named_barrier_test_base -{ - std::string str() - { - return "global_local_fence"; - } - - // Return value that is expected to be in output_buffer[i] - cl_uint operator()(size_t i, size_t work_group_size, size_t max_sub_group_size) - { - return static_cast(i % work_group_size); - } - - // At the end every work-item writes its local id to ouput[work-item-global-id]. - std::string generate_program() - { - // ----------------------------------------------------------------------------------- - // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ - // ----------------------------------------------------------------------------------- - #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - return - "__kernel void " + this->get_kernel_name() + "(global uint * output, " - "global uint * temp," - "local uint * lmem)\n" - "{\n" - "size_t gid = get_global_id(0);\n" - "output[gid] = get_local_id(0);\n" - "}\n"; - - #else - return - "#define cl_khr_subgroup_named_barrier\n" - "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void " + this->get_kernel_name() + "(global_ptr output, " - "global_ptr temp," - "local_ptr lmem)\n" - "{\n\n" - " local a(1);\n" - " local b(2);\n" - " size_t gid = get_global_id(0);\n" - " size_t lid = get_local_id(0);\n" - " size_t value = 0;\n" - " if(get_num_sub_groups() == 1)\n" - " {\n" - " size_t other_lid = (lid + 1) % get_enqueued_local_size(0);\n" - " size_t other_gid = (gid - lid) + other_lid;\n" - " lmem[other_lid] = other_gid;\n" - " temp[other_gid] = other_lid;\n" - " a.wait(mem_fence::local | mem_fence::global);\n" - " if(lmem[lid] == gid) value = temp[gid];\n" - " }\n" - " else if(get_num_sub_groups() == 2)\n" - " {\n" - " size_t other_lid = (lid + get_max_sub_group_size()) % get_enqueued_local_size(0);\n" - " size_t other_gid = (gid - lid) + other_lid;\n" - " lmem[other_lid] = other_gid;\n" - " temp[other_gid] = other_lid;\n" - " b.wait(mem_fence::local | mem_fence::global);\n" - " if(lmem[lid] == gid) value = temp[gid];\n" - " }\n" - " else if(get_num_sub_groups() > 2)\n" - " {\n" - " if(get_sub_group_id() < 2)\n" - " {\n" - " const size_t two_first_subgroups = 2 * get_max_sub_group_size();" - // local and global id of some work-item outside of work-item subgroup, - // but within subgroups 0 and 1. - " size_t other_lid = (lid + get_max_sub_group_size()) % two_first_subgroups;\n" - " size_t other_gid = (gid - lid) + other_lid;\n" - " lmem[other_lid] = other_gid;\n" - " temp[other_gid] = other_lid;\n" - " b.wait(mem_fence::local | mem_fence::global);\n" - " if(lmem[lid] == gid) value = temp[gid];\n" - " }\n" - " else\n" - " {\n" - " value = lid;\n" - " }\n" - " }\n" - " output[gid] = value;\n" - "}\n"; - #endif - } - - size_t get_max_local_size(const cl_kernel kernel, - const cl_device_id device, - const size_t work_group_size, // default work-group size - cl_int& error) - { - // Set size of the local memory, we need to to this to correctly calculate - // max possible work-group size. - size_t wg_size; - for(wg_size = work_group_size; wg_size > 1; wg_size /= 2) - { - error = clSetKernelArg(kernel, 2, wg_size * sizeof(cl_uint), NULL); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - - size_t max_wg_size; - error = clGetKernelWorkGroupInfo( - kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL - ); - RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo") - if(max_wg_size >= wg_size) break; - } - return wg_size; - } - - cl_int execute(const cl_kernel kernel, - const cl_mem output_buffer, - const cl_command_queue queue, - const size_t work_size, - const size_t work_group_size) - { - cl_int err; - // Get context from queue - cl_context context; - err = clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, NULL); - RETURN_ON_CL_ERROR(err, "clGetCommandQueueInfo") - - // create temp buffer - auto temp_buffer = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_uint) * work_size, NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer") - - err = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer); - err |= clSetKernelArg(kernel, 1, sizeof(temp_buffer), &temp_buffer); - err |= clSetKernelArg(kernel, 2, work_group_size * sizeof(cl_uint), NULL); - RETURN_ON_CL_ERROR(err, "clSetKernelArg") - - err = clEnqueueNDRangeKernel( - queue, kernel, 1, - NULL, &work_size, &work_group_size, - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel") - - err = clFinish(queue); - err |= clReleaseMemObject(temp_buffer); - - return err; - } -}; - -// ------------------------------------------------------------------------------ -// -------------------------- RUN TESTS ----------------------------------------- -// ------------------------------------------------------------------------------ -AUTO_TEST_CASE(test_work_group_named_barrier) -(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - -#if !(defined(DEVELOPMENT) && (defined(USE_OPENCLC_KERNELS) || defined(ONLY_SPIRV_COMPILATION))) - if(!is_extension_available(device, "cl_khr_subgroup_named_barrier")) - { - log_info("SKIPPED: Extension `cl_khr_subgroup_named_barrier` is not supported. Skipping tests.\n"); - return CL_SUCCESS; - } - - // An implementation shall support at least 8 named barriers per work-group. The exact - // maximum number can be queried using clGetDeviceInfo with CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR - // from the OpenCL 2.2 Extension Specification. - cl_uint named_barrier_count; - error = clGetDeviceInfo(device, CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR, sizeof(cl_uint), &named_barrier_count, NULL); - RETURN_ON_CL_ERROR(error, "clGetDeviceInfo") - - if(named_barrier_count < 8) - { - RETURN_ON_ERROR_MSG(-1, "Maximum number of named barriers must be at least 8."); - } -#endif - - RUN_WG_NAMED_BARRIER_TEST_MACRO(local_fence_named_barrier_test()) - RUN_WG_NAMED_BARRIER_TEST_MACRO(global_fence_named_barrier_test()) - RUN_WG_NAMED_BARRIER_TEST_MACRO(global_local_fence_named_barrier_test()) - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -} // namespace - -#endif // TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_TEST_NAMED_BARRIER_HPP diff --git a/test_conformance/clcpp/synchronization/named_barrier/test_spec_example.hpp b/test_conformance/clcpp/synchronization/named_barrier/test_spec_example.hpp deleted file mode 100644 index 7afbd00f1f..0000000000 --- a/test_conformance/clcpp/synchronization/named_barrier/test_spec_example.hpp +++ /dev/null @@ -1,325 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_TEST_SPEC_EXAMPLE_HPP -#define TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_TEST_SPEC_EXAMPLE_HPP - -#include "common.hpp" - -namespace named_barrier { - -// ------------------------------------------------------------------------------ -// ----------------------- SPECIFICATION EXAMPLE TEST---------------------------- -// ------------------------------------------------------------------------------ -// This test is based on the example in OpenCL C++ 1.0 specification (OpenCL C++ -// Standard Library > Synchronization Functions > Named barriers > wait). -struct spec_example_work_group_named_barrier_test : public work_group_named_barrier_test_base -{ - std::string str() - { - return "spec_example"; - } - - // Return value that is expected to be in output_buffer[i] - cl_uint operator()(size_t i, size_t work_group_size, size_t mas_sub_group_size) - { - return static_cast(i); - } - - // At the end every work-item writes its global id to ouput[work-item-global-id]. - std::string generate_program() - { - // ----------------------------------------------------------------------------------- - // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ - // ----------------------------------------------------------------------------------- - #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - return - // In OpenCL C kernel we imitate subgroups by partitioning work-group (based on - // local ids of work-items), work_group_named_barrier.wait(..) calls are replaced - // with work_group_barriers. - "__kernel void " + this->get_kernel_name() + "(global uint *output, " - "global uint * temp, " - "local uint * lmem)\n" - "{\n" - "size_t gid = get_global_id(0);\n" - "size_t lid = get_local_id(0);\n" - - // We divide work-group into ranges: - // [0 - e_wg)[ew_g; q_wg)[q_wg; 3 * ew_g)[3 * ew_g; h_wg)[h_wg; get_local_size(0) - 1] - // to simulate 8 subgroups - "size_t h_wg = get_local_size(0) / 2;\n" // half of work-group - "size_t q_wg = get_local_size(0) / 4;\n" // quarter - "size_t e_wg = get_local_size(0) / 8;\n" // one-eighth - - "if(lid < h_wg) lmem[lid] = gid;\n" // [0; h_wg) - "else temp[gid] = gid;\n" // [h_wg; get_local_size(0) - 1) - "work_group_barrier(CLK_LOCAL_MEM_FENCE);\n" - - "size_t other_lid = (lid + q_wg) % h_wg;\n" - "size_t value = 0;\n" - "if(lmem[other_lid] == ((gid - lid) + other_lid)){\n" - " value = gid;\n" - "}\n" - "work_group_barrier(CLK_LOCAL_MEM_FENCE);\n" - - "if(lid < q_wg){\n" // [0; q_wg) - " if(lid < e_wg) lmem[lid + e_wg] = gid;\n" // [0; e_wg) - " else lmem[lid - e_wg] = gid;\n" // [e_wg; q_wg) - "}\n" - "else if(lid < h_wg) {\n" // [q_wg; h_wg) - " if(lid < (3 * e_wg)) lmem[lid + e_wg] = gid;\n" // [q_ww; q_wg + e_wg) - " else lmem[lid - e_wg] = gid;\n" // [q_wg + e_wg; h_wg) - "}\n" - "work_group_barrier(CLK_LOCAL_MEM_FENCE);\n" - - "if(lid < q_wg){\n" // [0; q_wg) - " output[gid + q_wg] = lmem[lid];\n" - "}\n" - "else if(lid < h_wg) {\n" // [q_wg; h_wg) - " output[gid - q_wg] = lmem[lid];\n" - "}\n" - "work_group_barrier(CLK_GLOBAL_MEM_FENCE);\n" - - "if(lid < q_wg){\n" // [0; q_wg) - " if(lid < e_wg) temp[gid] = output[gid + (3 * e_wg)];\n" // [0; e_wg) - " else temp[gid] = output[gid + e_wg];\n" // [e_wg; q_wg) - "}\n" - "else if(lid < h_wg) {\n" // [q_wg; h_wg) - " if(lid < (3 * e_wg)) temp[gid] = output[gid - e_wg];\n" // [q_ww; q_wg + e_wg) - " else temp[gid] = output[gid - (3 * e_wg)];\n" // [q_wg + e_wg; h_wg) - "}\n" - "work_group_barrier(CLK_GLOBAL_MEM_FENCE);\n" - - "output[gid] = temp[gid];\n" - "}\n"; - - #else - return - "#define cl_khr_subgroup_named_barrier\n" - "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - - "void b_function(work_group_named_barrier &b, size_t value, local_ptr lmem)\n" - "{\n\n" - "size_t lid = get_local_id(0);\n" - // Work-items from the 1st subgroup writes to local memory that will be - // later read byt the 0th subgroup, and the other way around - 0th subgroup - // writes what 1st subgroup will later read. - // b.wait(mem_fence::local) should provide sync between those two subgroups. - "if(get_sub_group_id() < 1) lmem[lid + get_max_sub_group_size()] = value;\n" - "else lmem[lid - get_max_sub_group_size()] = value;\n" - "b.wait(mem_fence::local);\n\n" // sync writes to lmem for 2 subgroups (ids: 0, 1) - "}\n" - - "__kernel void " + this->get_kernel_name() + "(global_ptr output, " - "global_ptr temp, " - "local_ptr lmem)\n" - "{\n\n" - "local a(4);\n" - "local b(2);\n" - "local c(2);\n" - - "size_t gid = get_global_id(0);\n" - "size_t lid = get_local_id(0);\n" - "if(get_sub_group_id() < 4)" - "{\n" - " lmem[lid] = gid;\n" - " a.wait(mem_fence::local);\n" // sync writes to lmem for 4 subgroups (ids: 0, 1, 2, 3) - // Now all four subgroups should see changes in lmem. - " size_t other_lid = (lid + (2 * get_max_sub_group_size())) % (4 * get_max_sub_group_size());\n" - " size_t value = 0;\n" - " if(lmem[other_lid] == ((gid - lid) + other_lid)){\n" - " value = gid;\n" - " }\n" - " a.wait(mem_fence::local);\n" // sync reads from lmem for 4 subgroups (ids: 0, 1, 2, 3) - - " if(get_sub_group_id() < 2)" // ids: 0, 1 - " {\n" - " b_function(b, value, lmem);\n" - " }\n" - " else" // ids: 2, 3 - " {\n" - // Work-items from the 2nd subgroup writes to local memory that will be - // later read byt the 3rd subgroup, and the other way around - 3rd subgroup - // writes what 2nd subgroup will later read. - // c.wait(mem_fence::local) should provide sync between those two subgroups. - " if(get_sub_group_id() < 3) lmem[lid + get_max_sub_group_size()] = value ;\n" - " else lmem[lid - get_max_sub_group_size()] = value;\n" - " c.wait(mem_fence::local);\n" // sync writes to lmem for 2 subgroups (3, 4) - " }\n" - - // Now (0, 1) are in sync (local mem), and (3, 4) are in sync (local mem). - // However, subgroups (0, 1) are not in sync with (3, 4). - " if(get_sub_group_id() < 4) {\n" // ids: 0, 1, 2, 3 - " if(get_sub_group_id() < 2) output[gid + (2 * get_max_sub_group_size())] = lmem[lid];\n" - " else output[gid - (2 * get_max_sub_group_size())] = lmem[lid];\n" - " a.wait(mem_fence::global);\n" // sync writes to global memory (output) - // for 4 subgroups (0, 1, 2, 3) - " }\n" - "}\n" - "else {\n" // subgroups with id > 4 - " temp[gid] = gid;\n" - "}\n" - - // Now (0, 1, 2, 3) are in sync (global mem) - "if(get_sub_group_id() < 2) {\n" - " if(get_sub_group_id() < 1) temp[gid] = output[gid + (3 * get_max_sub_group_size())];\n" - " else temp[gid] = output[gid + (get_max_sub_group_size())];\n" - "}\n" - "else if(get_sub_group_id() < 4) {\n" - " if(get_sub_group_id() < 3) temp[gid] = output[gid - (get_max_sub_group_size())];\n" - " else temp[gid] = output[gid - (3 * get_max_sub_group_size())];\n" - "}\n" - - // Synchronize the entire work-group (in terms of accesses to global memory) - "work_group_barrier(mem_fence::global);\n" - "output[gid] = temp[gid];\n\n" - "}\n"; - #endif - } - - size_t get_max_local_size(const cl_kernel kernel, - const cl_device_id device, - const size_t work_group_size, // default work-group size - cl_int& error) - { - // Set size of the local memory, we need to to this to correctly calculate - // max possible work-group size. - size_t wg_size; - for(wg_size = work_group_size; wg_size > 1; wg_size /= 2) - { - error = clSetKernelArg(kernel, 2, ((wg_size / 2) + 1) * sizeof(cl_uint), NULL); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - - size_t max_wg_size; - error = clGetKernelWorkGroupInfo( - kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL - ); - RETURN_ON_ERROR(error) - if(max_wg_size >= wg_size) break; - } - - // ----------------------------------------------------------------------------------- - // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ - // ----------------------------------------------------------------------------------- - #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - // make sure wg_size is a multiple of 8 - if(wg_size % 8 > 0) wg_size -= (wg_size % 8); - return wg_size; - #else - // make sure that wg_size will produce at least min_num_sub_groups - // subgroups in each work-group - size_t local_size[3] = { 1, 1, 1 }; - size_t min_num_sub_groups = 8; - error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, - sizeof(size_t), &min_num_sub_groups, - sizeof(size_t) * 3, &local_size, NULL); - RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo") - if (local_size[0] == 0 || local_size[1] != 1 || local_size[2] != 1) - { - if(min_num_sub_groups == 1) - { - RETURN_ON_ERROR_MSG(-1, "Can't produce local size with one subgroup") - } - return 0; - } - local_size[0] = (std::min)(wg_size, local_size[0]); - - // double-check - size_t sub_group_count_for_ndrange; - error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, - sizeof(size_t) * 3, local_size, - sizeof(size_t), &sub_group_count_for_ndrange, NULL); - RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo") - if (sub_group_count_for_ndrange < min_num_sub_groups) - { - RETURN_ON_ERROR_MSG(-1, - "CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE did not return correct value (expected >=%lu, got %lu)", - min_num_sub_groups, sub_group_count_for_ndrange - ) - } - - return local_size[0]; - #endif - } - - cl_int execute(const cl_kernel kernel, - const cl_mem output_buffer, - const cl_command_queue queue, - const size_t work_size, - const size_t work_group_size) - { - cl_int err; - // Get context from queue - cl_context context; - err = clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, NULL); - RETURN_ON_CL_ERROR(err, "clGetCommandQueueInfo") - - // create temp buffer - auto temp_buffer = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_uint) * work_size, NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer") - - err = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer); - err |= clSetKernelArg(kernel, 1, sizeof(temp_buffer), &temp_buffer); - err |= clSetKernelArg(kernel, 2, work_group_size * sizeof(cl_uint), NULL); - RETURN_ON_CL_ERROR(err, "clSetKernelArg") - - err = clEnqueueNDRangeKernel( - queue, kernel, 1, - NULL, &work_size, &work_group_size, - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel") - - err = clFinish(queue); - err |= clReleaseMemObject(temp_buffer); - - return err; - } -}; - -// ------------------------------------------------------------------------------ -// -------------------------- RUN TESTS ----------------------------------------- -// ------------------------------------------------------------------------------ -AUTO_TEST_CASE(test_work_group_named_barrier_spec_example) -(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ -#if !(defined(DEVELOPMENT) && (defined(USE_OPENCLC_KERNELS) || defined(ONLY_SPIRV_COMPILATION))) - if(!is_extension_available(device, "cl_khr_subgroup_named_barrier")) - { - log_info("SKIPPED: Extension `cl_khr_subgroup_named_barrier` is not supported. Skipping tests.\n"); - return CL_SUCCESS; - } -#endif - - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - - RUN_WG_NAMED_BARRIER_TEST_MACRO(spec_example_work_group_named_barrier_test()) - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -} // namespace - -#endif // TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_TEST_SPEC_EXAMPLE_HPP diff --git a/test_conformance/clcpp/synchronization/test_sub_group_barrier.hpp b/test_conformance/clcpp/synchronization/test_sub_group_barrier.hpp deleted file mode 100644 index c7074ed026..0000000000 --- a/test_conformance/clcpp/synchronization/test_sub_group_barrier.hpp +++ /dev/null @@ -1,342 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_TEST_SUB_GROUP_BARRIER_HPP -#define TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_TEST_SUB_GROUP_BARRIER_HPP - -#include -#include -#include -#include -#include - -// Common for all OpenCL C++ tests -#include "../common.hpp" - - -namespace test_sub_group_barrier { - -enum class barrier_type -{ - local, - global -}; - -struct test_options -{ - barrier_type barrier; - size_t max_count; - size_t num_tests; -}; - -const std::string source_common = R"( - // Circular shift of sub-group local ids - size_t get_shifted_local_id(int sub_group_local_id_delta) - { - const int sub_group_size = (int)get_sub_group_size(); - return (get_local_id(0) - get_sub_group_local_id()) + - (((int)get_sub_group_local_id() + sub_group_local_id_delta) % sub_group_size + sub_group_size) % sub_group_size; - } - - // Get global ids from shifted local ids - size_t get_shifted_global_id(int sub_group_local_id_delta) - { - return get_group_id(0) * get_enqueued_local_size(0) + get_shifted_local_id(sub_group_local_id_delta); - } -)"; - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -std::string generate_source(test_options options) -{ - std::stringstream s; - s << R"( - #pragma OPENCL EXTENSION cl_khr_subgroups : enable - )"; - s << source_common; - if (options.barrier == barrier_type::global) - { - s << R"( - kernel void test(const int iter_lo, const int iter_hi, global long *output) - { - const size_t gid = get_shifted_global_id(0); - - output[gid] = gid; - sub_group_barrier(CLK_GLOBAL_MEM_FENCE); - - for (int i = iter_lo; i < iter_hi; i++) - { - const size_t other_gid = get_shifted_global_id(i); - - output[other_gid] += other_gid; - sub_group_barrier(CLK_GLOBAL_MEM_FENCE); - - output[gid] += gid; - sub_group_barrier(CLK_GLOBAL_MEM_FENCE); - } - } - )"; - } - else if (options.barrier == barrier_type::local) - { - s << R"( - kernel void test(const int iter_lo, const int iter_hi, global long *output, local long *values) - { - const size_t gid = get_shifted_global_id(0); - const size_t lid = get_shifted_local_id(0); - - values[lid] = gid; - sub_group_barrier(CLK_LOCAL_MEM_FENCE); - - for (int i = iter_lo; i < iter_hi; i++) - { - const size_t other_lid = get_shifted_local_id(i); - const size_t other_gid = get_shifted_global_id(i); - - values[other_lid] += other_gid; - sub_group_barrier(CLK_LOCAL_MEM_FENCE); - - values[lid] += gid; - sub_group_barrier(CLK_LOCAL_MEM_FENCE); - } - - output[gid] = values[lid]; - } - )"; - } - - return s.str(); -} -#else -std::string generate_source(test_options options) -{ - std::stringstream s; - s << R"( - #include - #include - #include - - using namespace cl; - - )"; - s << source_common; - - if (options.barrier == barrier_type::global) - { - s << R"( - kernel void test(const int iter_lo, const int iter_hi, global_ptr output) - { - const size_t gid = get_shifted_global_id(0); - - output[gid] = gid; - sub_group_barrier(mem_fence::global); - - for (int i = iter_lo; i < iter_hi; i++) - { - const size_t other_gid = get_shifted_global_id(i); - - output[other_gid] += other_gid; - sub_group_barrier(mem_fence::global); - - output[gid] += gid; - sub_group_barrier(mem_fence::global); - } - } - )"; - } - else if (options.barrier == barrier_type::local) - { - s << R"( - kernel void test(const int iter_lo, const int iter_hi, global_ptr output, local_ptr values) - { - const size_t gid = get_shifted_global_id(0); - const size_t lid = get_shifted_local_id(0); - - values[lid] = gid; - sub_group_barrier(mem_fence::local); - - for (int i = iter_lo; i < iter_hi; i++) - { - const size_t other_lid = get_shifted_local_id(i); - const size_t other_gid = get_shifted_global_id(i); - - values[other_lid] += other_gid; - sub_group_barrier(mem_fence::local); - - values[lid] += gid; - sub_group_barrier(mem_fence::local); - } - - output[gid] = values[lid]; - } - )"; - } - - return s.str(); -} -#endif - -int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options) -{ - int error = CL_SUCCESS; - -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - if (!is_extension_available(device, "cl_khr_subgroups")) - { - log_info("SKIPPED: Extension `cl_khr_subgroups` is not supported. Skipping tests.\n"); - return CL_SUCCESS; - } -#endif - - cl_program program; - cl_kernel kernel; - - std::string kernel_name = "test"; - std::string source = generate_source(options); - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - error = create_opencl_kernel( - context, &program, &kernel, - source, kernel_name - ); - RETURN_ON_ERROR(error) - return error; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - error = create_opencl_kernel( - context, &program, &kernel, - source, kernel_name, "-cl-std=CL2.0", false - ); - RETURN_ON_ERROR(error) -// Normal run -#else - error = create_opencl_kernel( - context, &program, &kernel, - source, kernel_name - ); - RETURN_ON_ERROR(error) -#endif - - size_t max_work_group_size; - error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_work_group_size), &max_work_group_size, NULL); - RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo") - - if (options.barrier == barrier_type::local) - { - cl_ulong kernel_local_mem_size; - error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(kernel_local_mem_size), &kernel_local_mem_size, NULL); - RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo") - - cl_ulong device_local_mem_size; - error = clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(device_local_mem_size), &device_local_mem_size, NULL); - RETURN_ON_CL_ERROR(error, "clGetDeviceInfo") - - max_work_group_size = (std::min)(max_work_group_size, (device_local_mem_size - kernel_local_mem_size) / sizeof(cl_long)); - } - - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution global_size_dis(1, options.max_count); - std::uniform_int_distribution local_size_dis(1, max_work_group_size); - std::uniform_int_distribution iter_dis(0, 20); - - for (size_t test = 0; test < options.num_tests; test++) - { - const size_t global_size = global_size_dis(gen); - const size_t local_size = local_size_dis(gen); - const size_t count = global_size; - - const int iter_lo = -iter_dis(gen); - const int iter_hi = +iter_dis(gen); - - cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_long) * count, NULL, &error); - RETURN_ON_CL_ERROR(error, "clCreateBuffer") - - error = clSetKernelArg(kernel, 0, sizeof(iter_lo), &iter_lo); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - error = clSetKernelArg(kernel, 1, sizeof(iter_hi), &iter_hi); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - error = clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - if (options.barrier == barrier_type::local) - { - error = clSetKernelArg(kernel, 3, sizeof(cl_long) * local_size, NULL); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - } - - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel") - - std::vector output(count); - error = clEnqueueReadBuffer( - queue, output_buffer, CL_TRUE, - 0, sizeof(cl_long) * count, - static_cast(output.data()), - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer") - - error = clReleaseMemObject(output_buffer); - RETURN_ON_CL_ERROR(error, "clReleaseMemObject") - - for (size_t gid = 0; gid < count; gid++) - { - const long value = output[gid]; - const long expected = gid + 2 * gid * (iter_hi - iter_lo); - - if (value != expected) - { - RETURN_ON_ERROR_MSG(-1, - "Element %lu has incorrect value. Expected: %ld, got: %ld", - gid, expected, value - ); - } - } - } - - clReleaseKernel(kernel); - clReleaseProgram(program); - return error; -} - -AUTO_TEST_CASE(test_sub_group_barrier_global) -(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - test_options options; - options.barrier = barrier_type::global; - options.num_tests = 1000; - options.max_count = num_elements; - return test(device, context, queue, options); -} - -AUTO_TEST_CASE(test_sub_group_barrier_local) -(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - test_options options; - options.barrier = barrier_type::local; - options.num_tests = 1000; - options.max_count = num_elements; - return test(device, context, queue, options); -} - -} // namespace - -#endif // TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_TEST_SUB_GROUP_BARRIER_HPP diff --git a/test_conformance/clcpp/synchronization/test_work_group_barrier.hpp b/test_conformance/clcpp/synchronization/test_work_group_barrier.hpp deleted file mode 100644 index aa7fbd2081..0000000000 --- a/test_conformance/clcpp/synchronization/test_work_group_barrier.hpp +++ /dev/null @@ -1,330 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_TEST_WORK_GROUP_BARRIER_HPP -#define TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_TEST_WORK_GROUP_BARRIER_HPP - -#include -#include -#include -#include -#include - -// Common for all OpenCL C++ tests -#include "../common.hpp" - - -namespace test_work_group_barrier { - -enum class barrier_type -{ - local, - global -}; - -struct test_options -{ - barrier_type barrier; - size_t max_count; - size_t num_tests; -}; - -const std::string source_common = R"( - // Circular shift of local ids - size_t get_shifted_local_id(int local_id_delta) - { - const int local_size = (int)get_local_size(0); - return (((int)get_local_id(0) + local_id_delta) % local_size + local_size) % local_size; - } - - // Get global ids from shifted local ids - size_t get_shifted_global_id(int local_id_delta) - { - return get_group_id(0) * get_enqueued_local_size(0) + get_shifted_local_id(local_id_delta); - } -)"; - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -std::string generate_source(test_options options) -{ - std::stringstream s; - s << source_common; - if (options.barrier == barrier_type::global) - { - s << R"( - kernel void test(const int iter_lo, const int iter_hi, global long *output) - { - const size_t gid = get_shifted_global_id(0); - - output[gid] = gid; - work_group_barrier(CLK_GLOBAL_MEM_FENCE); - - for (int i = iter_lo; i < iter_hi; i++) - { - const size_t other_gid = get_shifted_global_id(i); - - output[other_gid] += other_gid; - work_group_barrier(CLK_GLOBAL_MEM_FENCE); - - output[gid] += gid; - work_group_barrier(CLK_GLOBAL_MEM_FENCE); - } - } - )"; - } - else if (options.barrier == barrier_type::local) - { - s << R"( - kernel void test(const int iter_lo, const int iter_hi, global long *output, local long *values) - { - const size_t gid = get_shifted_global_id(0); - const size_t lid = get_shifted_local_id(0); - - values[lid] = gid; - work_group_barrier(CLK_LOCAL_MEM_FENCE); - - for (int i = iter_lo; i < iter_hi; i++) - { - const size_t other_lid = get_shifted_local_id(i); - const size_t other_gid = get_shifted_global_id(i); - - values[other_lid] += other_gid; - work_group_barrier(CLK_LOCAL_MEM_FENCE); - - values[lid] += gid; - work_group_barrier(CLK_LOCAL_MEM_FENCE); - } - - output[gid] = values[lid]; - } - )"; - } - - return s.str(); -} -#else -std::string generate_source(test_options options) -{ - std::stringstream s; - s << R"( - #include - #include - #include - - using namespace cl; - - )"; - s << source_common; - - if (options.barrier == barrier_type::global) - { - s << R"( - kernel void test(const int iter_lo, const int iter_hi, global_ptr output) - { - const size_t gid = get_shifted_global_id(0); - - output[gid] = gid; - work_group_barrier(mem_fence::global); - - for (int i = iter_lo; i < iter_hi; i++) - { - const size_t other_gid = get_shifted_global_id(i); - - output[other_gid] += other_gid; - work_group_barrier(mem_fence::global); - - output[gid] += gid; - work_group_barrier(mem_fence::global); - } - } - )"; - } - else if (options.barrier == barrier_type::local) - { - s << R"( - kernel void test(const int iter_lo, const int iter_hi, global_ptr output, local_ptr values) - { - const size_t gid = get_shifted_global_id(0); - const size_t lid = get_shifted_local_id(0); - - values[lid] = gid; - work_group_barrier(mem_fence::local); - - for (int i = iter_lo; i < iter_hi; i++) - { - const size_t other_lid = get_shifted_local_id(i); - const size_t other_gid = get_shifted_global_id(i); - - values[other_lid] += other_gid; - work_group_barrier(mem_fence::local); - - values[lid] += gid; - work_group_barrier(mem_fence::local); - } - - output[gid] = values[lid]; - } - )"; - } - - return s.str(); -} -#endif - -int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options) -{ - int error = CL_SUCCESS; - - cl_program program; - cl_kernel kernel; - - std::string kernel_name = "test"; - std::string source = generate_source(options); - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - error = create_opencl_kernel( - context, &program, &kernel, - source, kernel_name - ); - RETURN_ON_ERROR(error) - return error; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - error = create_opencl_kernel( - context, &program, &kernel, - source, kernel_name, "-cl-std=CL2.0", false - ); - RETURN_ON_ERROR(error) -// Normal run -#else - error = create_opencl_kernel( - context, &program, &kernel, - source, kernel_name - ); - RETURN_ON_ERROR(error) -#endif - - size_t max_work_group_size; - error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_work_group_size), &max_work_group_size, NULL); - RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo") - - if (options.barrier == barrier_type::local) - { - cl_ulong kernel_local_mem_size; - error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(kernel_local_mem_size), &kernel_local_mem_size, NULL); - RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo") - - cl_ulong device_local_mem_size; - error = clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(device_local_mem_size), &device_local_mem_size, NULL); - RETURN_ON_CL_ERROR(error, "clGetDeviceInfo") - - max_work_group_size = (std::min)(max_work_group_size, (device_local_mem_size - kernel_local_mem_size) / sizeof(cl_long)); - } - - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution global_size_dis(1, options.max_count); - std::uniform_int_distribution local_size_dis(1, max_work_group_size); - std::uniform_int_distribution iter_dis(0, 20); - - for (size_t test = 0; test < options.num_tests; test++) - { - const size_t global_size = global_size_dis(gen); - const size_t local_size = local_size_dis(gen); - const size_t count = global_size; - - const int iter_lo = -iter_dis(gen); - const int iter_hi = +iter_dis(gen); - - cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_long) * count, NULL, &error); - RETURN_ON_CL_ERROR(error, "clCreateBuffer") - - error = clSetKernelArg(kernel, 0, sizeof(iter_lo), &iter_lo); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - error = clSetKernelArg(kernel, 1, sizeof(iter_hi), &iter_hi); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - error = clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - if (options.barrier == barrier_type::local) - { - error = clSetKernelArg(kernel, 3, sizeof(cl_long) * local_size, NULL); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - } - - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel") - - std::vector output(count); - error = clEnqueueReadBuffer( - queue, output_buffer, CL_TRUE, - 0, sizeof(cl_long) * count, - static_cast(output.data()), - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer") - - error = clReleaseMemObject(output_buffer); - RETURN_ON_CL_ERROR(error, "clReleaseMemObject") - - for (size_t gid = 0; gid < count; gid++) - { - const long value = output[gid]; - const long expected = gid + 2 * gid * (iter_hi - iter_lo); - - if (value != expected) - { - RETURN_ON_ERROR_MSG(-1, - "Element %lu has incorrect value. Expected: %ld, got: %ld", - gid, expected, value - ); - } - } - } - - clReleaseKernel(kernel); - clReleaseProgram(program); - return error; -} - -AUTO_TEST_CASE(test_work_group_barrier_global) -(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - test_options options; - options.barrier = barrier_type::global; - options.num_tests = 1000; - options.max_count = num_elements; - return test(device, context, queue, options); -} - -AUTO_TEST_CASE(test_work_group_barrier_local) -(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - test_options options; - options.barrier = barrier_type::local; - options.num_tests = 1000; - options.max_count = num_elements; - return test(device, context, queue, options); -} - -} // namespace - -#endif // TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_TEST_WORK_GROUP_BARRIER_HPP diff --git a/test_conformance/clcpp/utils_common/errors.hpp b/test_conformance/clcpp/utils_common/errors.hpp deleted file mode 100644 index c1694626e9..0000000000 --- a/test_conformance/clcpp/utils_common/errors.hpp +++ /dev/null @@ -1,134 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_UTILS_COMMON_ERRORS_HPP -#define TEST_CONFORMANCE_CLCPP_UTILS_COMMON_ERRORS_HPP - -#include - -#include "../harness/errorHelpers.h" - -// ------------- Check OpenCL error helpers (marcos) ----------------- - -std::string get_cl_error_string(cl_int error) -{ -#define CASE_CL_ERROR(x) case x: return #x; - switch (error) - { - CASE_CL_ERROR(CL_SUCCESS) - CASE_CL_ERROR(CL_DEVICE_NOT_FOUND) - CASE_CL_ERROR(CL_DEVICE_NOT_AVAILABLE) - CASE_CL_ERROR(CL_COMPILER_NOT_AVAILABLE) - CASE_CL_ERROR(CL_MEM_OBJECT_ALLOCATION_FAILURE) - CASE_CL_ERROR(CL_OUT_OF_RESOURCES) - CASE_CL_ERROR(CL_OUT_OF_HOST_MEMORY) - CASE_CL_ERROR(CL_PROFILING_INFO_NOT_AVAILABLE) - CASE_CL_ERROR(CL_MEM_COPY_OVERLAP) - CASE_CL_ERROR(CL_IMAGE_FORMAT_MISMATCH) - CASE_CL_ERROR(CL_IMAGE_FORMAT_NOT_SUPPORTED) - CASE_CL_ERROR(CL_BUILD_PROGRAM_FAILURE) - CASE_CL_ERROR(CL_MAP_FAILURE) - CASE_CL_ERROR(CL_MISALIGNED_SUB_BUFFER_OFFSET) - CASE_CL_ERROR(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST) - CASE_CL_ERROR(CL_COMPILE_PROGRAM_FAILURE) - CASE_CL_ERROR(CL_LINKER_NOT_AVAILABLE) - CASE_CL_ERROR(CL_LINK_PROGRAM_FAILURE) - CASE_CL_ERROR(CL_DEVICE_PARTITION_FAILED) - CASE_CL_ERROR(CL_KERNEL_ARG_INFO_NOT_AVAILABLE) - - CASE_CL_ERROR(CL_INVALID_VALUE) - CASE_CL_ERROR(CL_INVALID_DEVICE_TYPE) - CASE_CL_ERROR(CL_INVALID_PLATFORM) - CASE_CL_ERROR(CL_INVALID_DEVICE) - CASE_CL_ERROR(CL_INVALID_CONTEXT) - CASE_CL_ERROR(CL_INVALID_QUEUE_PROPERTIES) - CASE_CL_ERROR(CL_INVALID_COMMAND_QUEUE) - CASE_CL_ERROR(CL_INVALID_HOST_PTR) - CASE_CL_ERROR(CL_INVALID_MEM_OBJECT) - CASE_CL_ERROR(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) - CASE_CL_ERROR(CL_INVALID_IMAGE_SIZE) - CASE_CL_ERROR(CL_INVALID_SAMPLER) - CASE_CL_ERROR(CL_INVALID_BINARY) - CASE_CL_ERROR(CL_INVALID_BUILD_OPTIONS) - CASE_CL_ERROR(CL_INVALID_PROGRAM) - CASE_CL_ERROR(CL_INVALID_PROGRAM_EXECUTABLE) - CASE_CL_ERROR(CL_INVALID_KERNEL_NAME) - CASE_CL_ERROR(CL_INVALID_KERNEL_DEFINITION) - CASE_CL_ERROR(CL_INVALID_KERNEL) - CASE_CL_ERROR(CL_INVALID_ARG_INDEX) - CASE_CL_ERROR(CL_INVALID_ARG_VALUE) - CASE_CL_ERROR(CL_INVALID_ARG_SIZE) - CASE_CL_ERROR(CL_INVALID_KERNEL_ARGS) - CASE_CL_ERROR(CL_INVALID_WORK_DIMENSION) - CASE_CL_ERROR(CL_INVALID_WORK_GROUP_SIZE) - CASE_CL_ERROR(CL_INVALID_WORK_ITEM_SIZE) - CASE_CL_ERROR(CL_INVALID_GLOBAL_OFFSET) - CASE_CL_ERROR(CL_INVALID_EVENT_WAIT_LIST) - CASE_CL_ERROR(CL_INVALID_EVENT) - CASE_CL_ERROR(CL_INVALID_OPERATION) - CASE_CL_ERROR(CL_INVALID_GL_OBJECT) - CASE_CL_ERROR(CL_INVALID_BUFFER_SIZE) - CASE_CL_ERROR(CL_INVALID_MIP_LEVEL) - CASE_CL_ERROR(CL_INVALID_GLOBAL_WORK_SIZE) - CASE_CL_ERROR(CL_INVALID_PROPERTY) - CASE_CL_ERROR(CL_INVALID_IMAGE_DESCRIPTOR) - CASE_CL_ERROR(CL_INVALID_COMPILER_OPTIONS) - CASE_CL_ERROR(CL_INVALID_LINKER_OPTIONS) - CASE_CL_ERROR(CL_INVALID_DEVICE_PARTITION_COUNT) - CASE_CL_ERROR(CL_INVALID_PIPE_SIZE) - CASE_CL_ERROR(CL_INVALID_DEVICE_QUEUE) - CASE_CL_ERROR(CL_INVALID_SPEC_ID) - CASE_CL_ERROR(CL_MAX_SIZE_RESTRICTION_EXCEEDED) - default: return "(unknown error code)"; - } -#undef CASE_CL_ERROR -} - -#define CHECK_ERROR(x) \ - if(x != CL_SUCCESS) \ - { \ - log_error("ERROR: %d, file: %s, line: %d\n", x, __FILE__, __LINE__);\ - } -#define CHECK_ERROR_MSG(x, ...) \ - if(x != CL_SUCCESS) \ - { \ - log_error("ERROR: " __VA_ARGS__);\ - log_error("\n");\ - log_error("ERROR: %d, file: %s, line: %d\n", x, __FILE__, __LINE__);\ - } -#define RETURN_ON_ERROR(x) \ - if(x != CL_SUCCESS) \ - { \ - log_error("ERROR: %d, file: %s, line: %d\n", x, __FILE__, __LINE__);\ - return x;\ - } -#define RETURN_ON_ERROR_MSG(x, ...) \ - if(x != CL_SUCCESS) \ - { \ - log_error("ERROR: " __VA_ARGS__);\ - log_error("\n");\ - log_error("ERROR: %d, file: %s, line: %d\n", x, __FILE__, __LINE__);\ - return x;\ - } - -#define RETURN_ON_CL_ERROR(x, cl_func_name) \ - if(x != CL_SUCCESS) \ - { \ - log_error("ERROR: %s failed: %s (%d)\n", cl_func_name, get_cl_error_string(x).c_str(), x);\ - log_error("ERROR: %d, file: %s, line: %d\n", x, __FILE__, __LINE__);\ - return x;\ - } - -#endif // TEST_CONFORMANCE_CLCPP_UTILS_TEST_ERRORS_HPP diff --git a/test_conformance/clcpp/utils_common/is_vector_type.hpp b/test_conformance/clcpp/utils_common/is_vector_type.hpp deleted file mode 100644 index 0232e51374..0000000000 --- a/test_conformance/clcpp/utils_common/is_vector_type.hpp +++ /dev/null @@ -1,60 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_UTILS_COMMON_IS_VECTOR_TYPE_HPP -#define TEST_CONFORMANCE_CLCPP_UTILS_COMMON_IS_VECTOR_TYPE_HPP - -#include "../common.hpp" - -// is_vector_type::value is true if Type is an OpenCL -// vector type; otherwise - false. -// -// Examples: -// * is_vector_type::value == false -// * is_vector_type::value == true -template -struct is_vector_type -{ - const static bool value = false; -}; - -#define ADD_VECTOR_TYPE(Type, n) \ - template<> \ - struct is_vector_type \ - { \ - const static bool value = true; \ - }; - -#define ADD_VECTOR_TYPES(Type) \ - ADD_VECTOR_TYPE(Type, 2) \ - ADD_VECTOR_TYPE(Type, 4) \ - ADD_VECTOR_TYPE(Type, 8) \ - ADD_VECTOR_TYPE(Type, 16) - -ADD_VECTOR_TYPES(cl_char) -ADD_VECTOR_TYPES(cl_uchar) -ADD_VECTOR_TYPES(cl_short) -ADD_VECTOR_TYPES(cl_ushort) -ADD_VECTOR_TYPES(cl_int) -ADD_VECTOR_TYPES(cl_uint) -ADD_VECTOR_TYPES(cl_long) -ADD_VECTOR_TYPES(cl_ulong) -ADD_VECTOR_TYPES(cl_float) -ADD_VECTOR_TYPES(cl_double) - -#undef ADD_VECTOR_TYPES -#undef ADD_VECTOR_TYPE - -#endif // TEST_CONFORMANCE_CLCPP_UTILS_COMMON_IS_VECTOR_TYPE_HPP diff --git a/test_conformance/clcpp/utils_common/kernel_helpers.hpp b/test_conformance/clcpp/utils_common/kernel_helpers.hpp deleted file mode 100644 index 189b8238fb..0000000000 --- a/test_conformance/clcpp/utils_common/kernel_helpers.hpp +++ /dev/null @@ -1,50 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_UTILS_COMMON_KERNEL_HELPERS_HPP -#define TEST_CONFORMANCE_CLCPP_UTILS_COMMON_KERNEL_HELPERS_HPP - -#include "../common.hpp" - -// Creates a OpenCL C++/C program out_program and kernel out_kernel. -int create_opencl_kernel(cl_context context, - cl_program *out_program, - cl_kernel *out_kernel, - const char *source, - const std::string& kernel_name, - const std::string& build_options = "", - const bool openclCXX = true) -{ - return create_single_kernel_helper( - context, out_program, out_kernel, 1, &source, - kernel_name.c_str(), build_options.c_str(), openclCXX - ); -} - -int create_opencl_kernel(cl_context context, - cl_program *out_program, - cl_kernel *out_kernel, - const std::string& source, - const std::string& kernel_name, - const std::string& build_options = "", - const bool openclCXX = true) -{ - return create_opencl_kernel( - context, out_program, out_kernel, - source.c_str(), kernel_name, build_options, openclCXX - ); -} - -#endif // TEST_CONFORMANCE_CLCPP_UTILS_COMMON_KERNEL_HELPERS_HPP diff --git a/test_conformance/clcpp/utils_common/make_vector_type.hpp b/test_conformance/clcpp/utils_common/make_vector_type.hpp deleted file mode 100644 index 11b11856b7..0000000000 --- a/test_conformance/clcpp/utils_common/make_vector_type.hpp +++ /dev/null @@ -1,65 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_UTILS_COMMON_MAKE_VECTOR_TYPE_HPP -#define TEST_CONFORMANCE_CLCPP_UTILS_COMMON_MAKE_VECTOR_TYPE_HPP - -#include "../common.hpp" - -// Using scalar_type and i creates a type scalar_typei. -// -// Example: -// * make_vector_type::type is cl_uint8 -// * make_vector_type::type is cl_uint -template -struct make_vector_type -{ - typedef void type; -}; - -#define ADD_MAKE_VECTOR_TYPE(Type, n) \ - template<> \ - struct make_vector_type \ - { \ - typedef Type ## n type; \ - }; - -#define ADD_MAKE_VECTOR_TYPES(Type) \ - template<> \ - struct make_vector_type \ - { \ - typedef Type type; \ - }; \ - ADD_MAKE_VECTOR_TYPE(Type, 2) \ - ADD_MAKE_VECTOR_TYPE(Type, 3) \ - ADD_MAKE_VECTOR_TYPE(Type, 4) \ - ADD_MAKE_VECTOR_TYPE(Type, 8) \ - ADD_MAKE_VECTOR_TYPE(Type, 16) - -ADD_MAKE_VECTOR_TYPES(cl_char) -ADD_MAKE_VECTOR_TYPES(cl_uchar) -ADD_MAKE_VECTOR_TYPES(cl_short) -ADD_MAKE_VECTOR_TYPES(cl_ushort) -ADD_MAKE_VECTOR_TYPES(cl_int) -ADD_MAKE_VECTOR_TYPES(cl_uint) -ADD_MAKE_VECTOR_TYPES(cl_long) -ADD_MAKE_VECTOR_TYPES(cl_ulong) -ADD_MAKE_VECTOR_TYPES(cl_float) -ADD_MAKE_VECTOR_TYPES(cl_double) - -#undef ADD_MAKE_VECTOR_TYPES -#undef ADD_MAKE_VECTOR_TYPE - -#endif // TEST_CONFORMANCE_CLCPP_UTILS_COMMON_MAKE_VECTOR_TYPE_HPP diff --git a/test_conformance/clcpp/utils_common/scalar_type.hpp b/test_conformance/clcpp/utils_common/scalar_type.hpp deleted file mode 100644 index 4c939bb2b3..0000000000 --- a/test_conformance/clcpp/utils_common/scalar_type.hpp +++ /dev/null @@ -1,64 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_UTILS_COMMON_SCALAR_TYPE_HPP -#define TEST_CONFORMANCE_CLCPP_UTILS_COMMON_SCALAR_TYPE_HPP - -#include "../common.hpp" - -// scalar_type::type returns scalar type of Type. -// -// Examples: -// * scalar_type::type is cl_float -// * scalar_type::types is cl_float -template -struct scalar_type -{ - typedef void type; -}; - -#define ADD_VECTOR_TYPE(Type, n) \ - template<> \ - struct scalar_type \ - { \ - typedef Type type; \ - }; - -#define ADD_VECTOR_TYPES(Type) \ - template<> \ - struct scalar_type \ - { \ - typedef Type type; \ - }; \ - ADD_VECTOR_TYPE(Type, 2) \ - ADD_VECTOR_TYPE(Type, 4) \ - ADD_VECTOR_TYPE(Type, 8) \ - ADD_VECTOR_TYPE(Type, 16) - -ADD_VECTOR_TYPES(cl_char) -ADD_VECTOR_TYPES(cl_uchar) -ADD_VECTOR_TYPES(cl_short) -ADD_VECTOR_TYPES(cl_ushort) -ADD_VECTOR_TYPES(cl_int) -ADD_VECTOR_TYPES(cl_uint) -ADD_VECTOR_TYPES(cl_long) -ADD_VECTOR_TYPES(cl_ulong) -ADD_VECTOR_TYPES(cl_float) -ADD_VECTOR_TYPES(cl_double) - -#undef ADD_VECTOR_TYPES -#undef ADD_VECTOR_TYPE - -#endif // TEST_CONFORMANCE_CLCPP_UTILS_COMMON_SCALAR_TYPE_HPP diff --git a/test_conformance/clcpp/utils_common/string.hpp b/test_conformance/clcpp/utils_common/string.hpp deleted file mode 100644 index ad5ac9f086..0000000000 --- a/test_conformance/clcpp/utils_common/string.hpp +++ /dev/null @@ -1,70 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_UTILS_COMMON_STRING_HPP -#define TEST_CONFORMANCE_CLCPP_UTILS_COMMON_STRING_HPP - - -#include -#include -#include -#include - -#include "is_vector_type.hpp" -#include "scalar_type.hpp" -#include "type_name.hpp" - -#include "../common.hpp" - - -template -std::string format_value(const type& value, - typename std::enable_if::value>::type* = 0) -{ - std::stringstream s; - s << type_name() << "{ "; - s << std::scientific << std::setprecision(6); - for (size_t j = 0; j < vector_size::value; j++) - { - if (j > 0) - s << ", "; - s << value.s[j]; - } - s << " }"; - return s.str(); -} - -template -std::string format_value(const type& value, - typename std::enable_if::value>::type* = 0) -{ - std::stringstream s; - s << type_name() << "{ "; - s << std::scientific << std::setprecision(6); - s << value; - s << " }"; - return s.str(); -} - -void replace_all(std::string& str, const std::string& from, const std::string& to) -{ - size_t start_pos = 0; - while((start_pos = str.find(from, start_pos)) != std::string::npos) { - str.replace(start_pos, from.length(), to); - start_pos += to.length(); - } -} - -#endif // TEST_CONFORMANCE_CLCPP_UTILS_COMMON_STRING_HPP diff --git a/test_conformance/clcpp/utils_common/type_name.hpp b/test_conformance/clcpp/utils_common/type_name.hpp deleted file mode 100644 index c66f6e49e5..0000000000 --- a/test_conformance/clcpp/utils_common/type_name.hpp +++ /dev/null @@ -1,65 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_UTILS_COMMON_TYPE_NAME_HPP -#define TEST_CONFORMANCE_CLCPP_UTILS_COMMON_TYPE_NAME_HPP - -#include "../common.hpp" - -// Returns type name (in OpenCL device). -// cl_uint - "uint", cl_float2 -> "float2" -template -std::string type_name() -{ - return "unknown"; -} - -#define ADD_TYPE_NAME(Type, str) \ - template<> \ - std::string type_name() \ - { \ - return #str; \ - } - -#define ADD_TYPE_NAME2(Type) \ - ADD_TYPE_NAME(cl_ ## Type, Type) - -#define ADD_TYPE_NAME3(Type, x) \ - ADD_TYPE_NAME2(Type ## x) - -#define ADD_TYPE_NAMES(Type) \ - ADD_TYPE_NAME2(Type) \ - ADD_TYPE_NAME3(Type, 2) \ - ADD_TYPE_NAME3(Type, 4) \ - ADD_TYPE_NAME3(Type, 8) \ - ADD_TYPE_NAME3(Type, 16) - -ADD_TYPE_NAMES(char) -ADD_TYPE_NAMES(uchar) -ADD_TYPE_NAMES(short) -ADD_TYPE_NAMES(ushort) -ADD_TYPE_NAMES(int) -ADD_TYPE_NAMES(uint) -ADD_TYPE_NAMES(long) -ADD_TYPE_NAMES(ulong) -ADD_TYPE_NAMES(float) -ADD_TYPE_NAMES(double) - -#undef ADD_TYPE_NAMES -#undef ADD_TYPE_NAME3 -#undef ADD_TYPE_NAME2 -#undef ADD_TYPE_NAME - -#endif // TEST_CONFORMANCE_CLCPP_UTILS_COMMON_TYPE_NAME_HPP diff --git a/test_conformance/clcpp/utils_common/type_supported.hpp b/test_conformance/clcpp/utils_common/type_supported.hpp deleted file mode 100644 index 8d4f721b46..0000000000 --- a/test_conformance/clcpp/utils_common/type_supported.hpp +++ /dev/null @@ -1,106 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_UTILS_COMMON_TYPE_SUPPORTED_HPP -#define TEST_CONFORMANCE_CLCPP_UTILS_COMMON_TYPE_SUPPORTED_HPP - -#include "../common.hpp" - -// Returns true if type is supported by device; otherwise - false; -template -bool type_supported(cl_device_id device) -{ - (void) device; - return false; -} - -#define ADD_SUPPORTED_TYPE(Type) \ - template<> \ - bool type_supported(cl_device_id device) \ - { \ - (void) device; \ - return true; \ - } - -ADD_SUPPORTED_TYPE(cl_char) -ADD_SUPPORTED_TYPE(cl_uchar) -ADD_SUPPORTED_TYPE(cl_short) -ADD_SUPPORTED_TYPE(cl_ushort) -ADD_SUPPORTED_TYPE(cl_int) -ADD_SUPPORTED_TYPE(cl_uint) - -// ulong -template<> -bool type_supported(cl_device_id device) -{ - // long types do not have to be supported in EMBEDDED_PROFILE. - char profile[128]; - int error; - - error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL); - if (error != CL_SUCCESS) - { - log_error("ERROR: clGetDeviceInfo failed with CL_DEVICE_PROFILE\n"); - return false; - } - - if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0) - return is_extension_available(device, "cles_khr_int64"); - - return true; -} -// long -template<> -bool type_supported(cl_device_id device) -{ - return type_supported(device); -} -ADD_SUPPORTED_TYPE(cl_float) -// double -template<> -bool type_supported(cl_device_id device) -{ - return is_extension_available(device, "cl_khr_fp64"); -} - -#define ADD_SUPPORTED_VEC_TYPE1(Type, n) \ - template<> \ - bool type_supported(cl_device_id device) \ - { \ - return type_supported(device); \ - } - -#define ADD_SUPPORTED_VEC_TYPE2(Type) \ - ADD_SUPPORTED_VEC_TYPE1(Type, 2) \ - ADD_SUPPORTED_VEC_TYPE1(Type, 4) \ - ADD_SUPPORTED_VEC_TYPE1(Type, 8) \ - ADD_SUPPORTED_VEC_TYPE1(Type, 16) - -ADD_SUPPORTED_VEC_TYPE2(cl_char) -ADD_SUPPORTED_VEC_TYPE2(cl_uchar) -ADD_SUPPORTED_VEC_TYPE2(cl_short) -ADD_SUPPORTED_VEC_TYPE2(cl_ushort) -ADD_SUPPORTED_VEC_TYPE2(cl_int) -ADD_SUPPORTED_VEC_TYPE2(cl_uint) -ADD_SUPPORTED_VEC_TYPE2(cl_long) -ADD_SUPPORTED_VEC_TYPE2(cl_ulong) -ADD_SUPPORTED_VEC_TYPE2(cl_float) -// ADD_SUPPORTED_VEC_TYPE2(cl_double) - -#undef ADD_SUPPORTED_VEC_TYPE2 -#undef ADD_SUPPORTED_VEC_TYPE1 -#undef ADD_SUPPORTED_TYPE - -#endif // TEST_CONFORMANCE_CLCPP_UTILS_COMMON_TYPE_SUPPORTED_HPP diff --git a/test_conformance/clcpp/utils_common/vector_size.hpp b/test_conformance/clcpp/utils_common/vector_size.hpp deleted file mode 100644 index 4817506e47..0000000000 --- a/test_conformance/clcpp/utils_common/vector_size.hpp +++ /dev/null @@ -1,61 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_UTILS_COMMON_VECTOR_SIZE_HPP -#define TEST_CONFORMANCE_CLCPP_UTILS_COMMON_VECTOR_SIZE_HPP - -#include "../common.hpp" - -// Returns 1 if Type is a scalar type; otherwise if it's a vector type, -// it returns number of components in that Type. -template -struct vector_size -{ - const static size_t value = 1; -}; - -#define ADD_VECTOR_SIZE_TYPE(Type, n) \ - template<> \ - struct vector_size \ - { \ - const static size_t value = n; \ - }; - -#define ADD_VECTOR_SIZE_TYPES(Type) \ - template<> \ - struct vector_size \ - { \ - const static size_t value = 1; \ - }; \ - ADD_VECTOR_SIZE_TYPE(Type, 2) \ - ADD_VECTOR_SIZE_TYPE(Type, 4) \ - ADD_VECTOR_SIZE_TYPE(Type, 8) \ - ADD_VECTOR_SIZE_TYPE(Type, 16) - -ADD_VECTOR_SIZE_TYPES(cl_char) -ADD_VECTOR_SIZE_TYPES(cl_uchar) -ADD_VECTOR_SIZE_TYPES(cl_short) -ADD_VECTOR_SIZE_TYPES(cl_ushort) -ADD_VECTOR_SIZE_TYPES(cl_int) -ADD_VECTOR_SIZE_TYPES(cl_uint) -ADD_VECTOR_SIZE_TYPES(cl_long) -ADD_VECTOR_SIZE_TYPES(cl_ulong) -ADD_VECTOR_SIZE_TYPES(cl_float) -ADD_VECTOR_SIZE_TYPES(cl_double) - -#undef ADD_VECTOR_SIZE_TYPES -#undef ADD_VECTOR_SIZE_TYPE - -#endif // TEST_CONFORMANCE_CLCPP_UTILS_COMMON_VECTOR_SIZE_HPP diff --git a/test_conformance/clcpp/utils_test/binary.hpp b/test_conformance/clcpp/utils_test/binary.hpp deleted file mode 100644 index 893cbed09b..0000000000 --- a/test_conformance/clcpp/utils_test/binary.hpp +++ /dev/null @@ -1,305 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_UTILS_TEST_BINARY_HPP -#define TEST_CONFORMANCE_CLCPP_UTILS_TEST_BINARY_HPP - -#include -#include -#include -#include - -#include "../common.hpp" - -#include "detail/base_func_type.hpp" -#include "generate_inputs.hpp" -#include "compare.hpp" - -template -struct binary_func : public detail::base_func_type -{ - typedef IN1 in1_type; - typedef IN2 in2_type; - typedef OUT1 out_type; - - virtual ~binary_func() {}; - virtual std::string str() = 0; - - std::string decl_str() - { - return type_name() + "(" + type_name() + ", " + type_name() + ")"; - } - - bool is_in1_bool() - { - return false; - } - - bool is_in2_bool() - { - return false; - } - - IN1 min1() - { - return detail::get_min(); - } - - IN1 max1() - { - return detail::get_max(); - } - - IN2 min2() - { - return detail::get_min(); - } - - IN2 max2() - { - return detail::get_max(); - } - - std::vector in1_special_cases() - { - return { }; - } - - std::vector in2_special_cases() - { - return { }; - } - - template - typename make_vector_type::value>::type - delta(const IN1& in1, const IN2& in2, const T& expected) - { - typedef - typename make_vector_type::value>::type - delta_vector_type; - // Take care of unused variable warning - (void) in1; - (void) in2; - auto e = detail::make_value(1e-3); - return detail::multiply(e, expected); - } -}; - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -template -std::string generate_kernel_binary(func_type func) -{ - std::string in1_value = "input1[gid]"; - if(func.is_in1_bool()) - { - std::string i = vector_size::value == 1 ? "" : std::to_string(vector_size::value); - in1_value = "(input1[gid] != (int" + i + ")(0))"; - } - std::string in2_value = "input2[gid]"; - if(func.is_in2_bool()) - { - std::string i = vector_size::value == 1 ? "" : std::to_string(vector_size::value); - in2_value = "(input2[gid] != (int" + i + ")(0))"; - } - std::string function_call = func.str() + "(" + in1_value + ", " + in2_value + ")"; - if(func.is_out_bool()) - { - std::string i = vector_size::value == 1 ? "" : std::to_string(vector_size::value); - function_call = "convert_int" + i + "(" + func.str() + "(" + in1_value + ", " + in2_value + "))"; - } - return - "__kernel void " + func.get_kernel_name() + "(global " + type_name() + " *input1,\n" - " global " + type_name() + " *input2,\n" - " global " + type_name() + " *output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " output[gid] = " + function_call + ";\n" - "}\n"; -} -#else -template -std::string generate_kernel_binary(func_type func) -{ - std::string headers = func.headers(); - std::string in1_value = "input1[gid]"; - if(func.is_in1_bool()) - { - std::string i = vector_size::value == 1 ? "" : std::to_string(vector_size::value); - in1_value = "(input1[gid] != (int" + i + ")(0))"; - } - std::string in2_value = "input2[gid]"; - if(func.is_in2_bool()) - { - std::string i = vector_size::value == 1 ? "" : std::to_string(vector_size::value); - in2_value = "(input2[gid] != (int" + i + ")(0))"; - } - std::string function_call = func.str() + "(" + in1_value + ", " + in2_value + ")"; - if(func.is_out_bool()) - { - std::string i = vector_size::value == 1 ? "" : std::to_string(vector_size::value); - function_call = "convert_cast(" + func.str() + "(" + in1_value + ", " + in2_value + "))"; - } - if(func.is_out_bool() || func.is_in1_bool() || func.is_in2_bool()) - { - if(headers.find("#include ") == std::string::npos) - { - headers += "#include \n"; - } - } - return - "" + func.defs() + - "" + headers + - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void " + func.get_kernel_name() + "(global_ptr<" + type_name() + "[]> input1,\n" - " global_ptr<" + type_name() + "[]> input2,\n" - " global_ptr<" + type_name() + "[]> output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " output[gid] = " + function_call + ";\n" - "}\n"; -} -#endif - -template -bool verify_binary(const std::vector &in1, - const std::vector &in2, - const std::vector &out, - binary_op op) -{ - for(size_t i = 0; i < in1.size(); i++) - { - auto expected = op(in1[i], in2[i]); - if(!are_equal(expected, out[i], op.delta(in1[i], in2[i], expected), op)) - { - print_error_msg(expected, out[i], i, op); - return false; - } - } - return true; -} - -template -int test_binary_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, binary_op op) -{ - cl_mem buffers[3]; - cl_program program; - cl_kernel kernel; - size_t work_size[1]; - int err; - - typedef typename binary_op::in1_type INPUT1; - typedef typename binary_op::in2_type INPUT2; - typedef typename binary_op::out_type OUTPUT; - - // Don't run test for unsupported types - if(!(type_supported(device) - && type_supported(device) - && type_supported(device))) - { - return CL_SUCCESS; - } - - std::string code_str = generate_kernel_binary(op); - std::string kernel_name = op.get_kernel_name(); - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name); - RETURN_ON_ERROR(err) - return err; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false); - RETURN_ON_ERROR(err) -#else - err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name); - RETURN_ON_ERROR(err) -#endif - - std::vector in1_spec_cases = op.in1_special_cases(); - std::vector in2_spec_cases = op.in2_special_cases(); - prepare_special_cases(in1_spec_cases, in2_spec_cases); - std::vector input1 = generate_input(count, op.min1(), op.max1(), in1_spec_cases); - std::vector input2 = generate_input(count, op.min2(), op.max2(), in2_spec_cases); - std::vector output = generate_output(count); - - buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(INPUT1) * input1.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer") - - buffers[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(INPUT2) * input2.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer") - - buffers[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(OUTPUT) * output.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer") - - err = clEnqueueWriteBuffer( - queue, buffers[0], CL_TRUE, 0, sizeof(INPUT1) * input1.size(), - static_cast(input1.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer") - - err = clEnqueueWriteBuffer( - queue, buffers[1], CL_TRUE, 0, sizeof(INPUT2) * input2.size(), - static_cast(input2.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer") - - err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]); - err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]); - err |= clSetKernelArg(kernel, 2, sizeof(buffers[2]), &buffers[2]); - RETURN_ON_CL_ERROR(err, "clSetKernelArg"); - - work_size[0] = count; - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel"); - - err = clEnqueueReadBuffer( - queue, buffers[2], CL_TRUE, 0, sizeof(OUTPUT) * output.size(), - static_cast(output.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer"); - - if (!verify_binary(input1, input2, output, op)) - { - RETURN_ON_ERROR_MSG(-1, - "test_%s %s(%s, %s) failed", op.str().c_str(), - type_name().c_str(), type_name().c_str(), type_name().c_str() - ); - } - log_info( - "test_%s %s(%s, %s) passed\n", op.str().c_str(), - type_name().c_str(), type_name().c_str(), type_name().c_str() - ); - - clReleaseMemObject(buffers[0]); - clReleaseMemObject(buffers[1]); - clReleaseMemObject(buffers[2]); - clReleaseKernel(kernel); - clReleaseProgram(program); - return err; -} - -#endif // TEST_CONFORMANCE_CLCPP_UTILS_TEST_BINARY_HPP diff --git a/test_conformance/clcpp/utils_test/compare.hpp b/test_conformance/clcpp/utils_test/compare.hpp deleted file mode 100644 index a22b88fd21..0000000000 --- a/test_conformance/clcpp/utils_test/compare.hpp +++ /dev/null @@ -1,161 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_UTILS_TEST_COMPARE_HPP -#define TEST_CONFORMANCE_CLCPP_UTILS_TEST_COMPARE_HPP - -#include -#include -#include -#include - -#include - -#include "../common.hpp" - -// Checks if x is equal to y. -template -inline bool are_equal(const type& x, - const type& y, - const delta_type& delta, - op_type op, - typename std::enable_if< - is_vector_type::value - && std::is_integral::type>::value - >::type* = 0) -{ - (void) delta; - for(size_t i = 0; i < vector_size::value; i++) - { - if(op.is_out_bool()) - { - if(!((x.s[i] != 0) == (y.s[i] != 0))) - { - return false; - } - } - else if(!(x.s[i] == y.s[i])) - { - return false; - } - } - return true; -} - -template -inline bool are_equal(const type& x, - const type& y, - const delta_type& delta, - op_type op, - typename std::enable_if< - !is_vector_type::value - && std::is_integral::value - >::type* = 0) -{ - (void) delta; - if(op.is_out_bool()) - { - if(!((x != 0) == (y != 0))) - { - return false; - } - } - return x == y; -} - -template -inline bool are_equal(const type& x, - const type1& y, - const type2& delta, - op_type op, - typename std::enable_if< - !is_vector_type::value - && std::is_floating_point::value - >::type* = 0) -{ - // x - expected - // y - result - - // INFO: - // Whe don't care about subnormal values in OpenCL C++ tests - if(std::fpclassify(static_cast(x)) == FP_SUBNORMAL || std::fpclassify(y) == FP_SUBNORMAL) - { - return true; - } - - // both are NaN - if((std::isnan)(static_cast(x)) && (std::isnan)(y)) - { - return true; - } - // one is NaN - else if((std::isnan)(static_cast(x)) || (std::isnan)(y)) - { - return false; - } - - // Check for perfect match, it also covers inf, -inf - if(static_cast(x) != y) - { - // Check if values are close - if(std::abs(static_cast(x) - y) > (std::max)(std::numeric_limits::epsilon(), std::abs(delta))) - { - return false; - } - // Check ulp - if(op.use_ulp()) - { - return !(std::abs(Ulp_Error(x, y)) > op.ulp()); - } - } - return true; -} - -template -inline bool are_equal(const type& x, - const type1& y, - const type2& delta, - op_type op, - typename std::enable_if< - is_vector_type::value - && std::is_floating_point::type>::value - >::type* = 0) -{ - // x - expected - // y - result - for(size_t i = 0; i < vector_size::value; i++) - { - if(!are_equal(x.s[i], y.s[i], delta.s[i], op)) - { - return false; - } - } - return true; -} - -template -inline void print_error_msg(const type& expected, const type1& result, size_t i, func op) -{ - log_error( - "ERROR: test_%s %s failed. Error at %lu: Expected: %s, got: %s\n", - op.str().c_str(), - op.decl_str().c_str(), - i, - format_value(expected).c_str(), - format_value(result).c_str() - ); -} - -#endif // TEST_CONFORMANCE_CLCPP_UTILS_TEST_COMPARE_HPP diff --git a/test_conformance/clcpp/utils_test/detail/base_func_type.hpp b/test_conformance/clcpp/utils_test/detail/base_func_type.hpp deleted file mode 100644 index 92e375d008..0000000000 --- a/test_conformance/clcpp/utils_test/detail/base_func_type.hpp +++ /dev/null @@ -1,112 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_UTILS_TEST_DETAIL_BASE_FUNC_TYPE_HPP -#define TEST_CONFORMANCE_CLCPP_UTILS_TEST_DETAIL_BASE_FUNC_TYPE_HPP - -#include -#include -#include -#include - -#include - -#include "../../common.hpp" - -#include "vec_helpers.hpp" - -namespace detail -{ - -template -struct base_func_type -{ - virtual ~base_func_type() {}; - - // Returns function name - virtual std::string str() = 0; - - // Returns name of the test kernel for that function - virtual std::string get_kernel_name() - { - std::string kn = this->str(); - replace_all(kn, "::", "_"); - return "test_" + kn; - } - - // Returns required defines and pragmas. - virtual std::string defs() - { - return ""; - } - - // Returns required OpenCL C++ headers. - virtual std::string headers() - { - return ""; - } - - // Return true if OUT1 type in OpenCL kernel should be treated - // as bool type; false otherwise. - bool is_out_bool() - { - return false; - } - - // Max ULP error, that is error should be raised when - // if Ulp_Error(result, expected) > ulp() - float ulp() - { - return 0.0f; - } - - // Should we check ULP error when verifing if the result is - // correct? - // - // (This effects how are_equal() function works, - // it may not have effect if verify() method in derived - // class does not use are_equal() function.) - // - // Only for FP numbers/vectors - bool use_ulp() - { - return true; - } - - // Max error. Error should be raised if - // abs(result - expected) > delta(.., expected) - // - // Default value: 0.001 * expected - // - // (This effects how are_equal() function works, - // it may not have effect if verify() method in derived - // class does not use are_equal() function.) - // - // Only for FP numbers/vectors - template - typename make_vector_type::value>::type - delta(const T& expected) - { - typedef - typename make_vector_type::value>::type - delta_vector_type; - auto e = detail::make_value(1e-3); - return detail::multiply(e, expected); - } -}; - -} // detail namespace - -#endif // TEST_CONFORMANCE_CLCPP_UTILS_TEST_DETAIL_BASE_FUNC_TYPE_HPP diff --git a/test_conformance/clcpp/utils_test/detail/vec_helpers.hpp b/test_conformance/clcpp/utils_test/detail/vec_helpers.hpp deleted file mode 100644 index 05df42aacd..0000000000 --- a/test_conformance/clcpp/utils_test/detail/vec_helpers.hpp +++ /dev/null @@ -1,104 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_UTILS_TEST_DETAIL_VEC_HELPERS_HPP -#define TEST_CONFORMANCE_CLCPP_UTILS_TEST_DETAIL_VEC_HELPERS_HPP - -#include -#include -#include -#include - -#include - -#include "../../common.hpp" - -namespace detail -{ - -template -T make_value(typename scalar_type::type x, typename std::enable_if::value>::type* = 0) -{ - T value; - for(size_t i = 0; i < vector_size::value; i++) - { - value.s[i] = x; - } - return value; -} - -template -T make_value(T x, typename std::enable_if::value>::type* = 0) -{ - return x; -} - -template -result_type multiply(const IN1& x, const IN2& y, typename std::enable_if::value>::type* = 0) -{ - static_assert( - (vector_size::value == vector_size::value) - && (vector_size::value == vector_size::value), - "Vector sizes must be the same." - ); - typedef typename scalar_type::type SCALAR; - result_type value; - for(size_t i = 0; i < vector_size::value; i++) - { - value.s[i] = static_cast(x.s[i]) * static_cast(y.s[i]); - } - return value; -} - -template -result_type multiply(const IN1& x, const IN2& y, typename std::enable_if::value>::type* = 0) -{ - static_assert( - !is_vector_type::value && !is_vector_type::value, - "IN1 and IN2 must be scalar types" - ); - return static_cast(x) * static_cast(y); -} - -template -T get_min() -{ - typedef typename scalar_type::type SCALAR; - return make_value((std::numeric_limits::min)()); -} - -template -T get_max() -{ - typedef typename scalar_type::type SCALAR; - return make_value((std::numeric_limits::max)()); -} - -template -T get_part_max(typename scalar_type::type x) -{ - typedef typename scalar_type::type SCALAR; - return make_value((std::numeric_limits::max)() / x); -} - -template -T def_limit(typename scalar_type::type x) -{ - return make_value(x); -} - -} // detail namespace - -#endif // TEST_CONFORMANCE_CLCPP_UTILS_TEST_DETAIL_VEC_HELPERS_HPP diff --git a/test_conformance/clcpp/utils_test/generate_inputs.hpp b/test_conformance/clcpp/utils_test/generate_inputs.hpp deleted file mode 100644 index bb0d750656..0000000000 --- a/test_conformance/clcpp/utils_test/generate_inputs.hpp +++ /dev/null @@ -1,331 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_UTILS_TEST_GENERATE_INPUTS_HPP -#define TEST_CONFORMANCE_CLCPP_UTILS_TEST_GENERATE_INPUTS_HPP - -#include -#include -#include -#include - -#include - -#include "../common.hpp" - -template -std::vector generate_input(size_t count, - const type& min, - const type& max, - const std::vector special_cases, - typename std::enable_if< - is_vector_type::value - && std::is_integral::type>::value - // std::uniform_int_distribution<> does not work in VS2015 for cl_uchar and cl_char, - // because VS2015 thinks that use cl_int, because VS2015 thinks cl_uchar cl_char are - // not int types - && !(std::is_same::type, cl_uchar>::value - || std::is_same::type, cl_char>::value) - >::type* = 0) -{ - typedef typename scalar_type::type SCALAR; - const size_t vec_size = vector_size::value; - - std::vector input(count); - std::random_device rd; - std::mt19937 gen(rd()); - std::vector> dists(vec_size); - for(size_t i = 0; i < vec_size; i++) - { - dists[i] = std::uniform_int_distribution(min.s[i], max.s[i]); - } - for(auto& i : input) - { - for(size_t j = 0; j < vec_size; j++) - { - i.s[j] = dists[j](gen); - } - } - - input.insert(input.begin(), special_cases.begin(), special_cases.end()); - input.resize(count); - return input; -} - -template -std::vector generate_input(size_t count, - const type& min, - const type& max, - const std::vector special_cases, - typename std::enable_if< - is_vector_type::value - && std::is_integral::type>::value - // std::uniform_int_distribution<> does not work in VS2015 for cl_uchar and cl_char, - // because VS2015 thinks that use cl_int, because VS2015 thinks cl_uchar cl_char are - // not int types - && (std::is_same::type, cl_uchar>::value - || std::is_same::type, cl_char>::value) - >::type* = 0) -{ - typedef typename scalar_type::type SCALAR; - const size_t vec_size = vector_size::value; - - std::vector input(count); - std::random_device rd; - std::mt19937 gen(rd()); - std::vector> dists(vec_size); - for(size_t i = 0; i < vec_size; i++) - { - dists[i] = std::uniform_int_distribution( - static_cast(min.s[i]), - static_cast(max.s[i]) - ); - } - for(auto& i : input) - { - for(size_t j = 0; j < vec_size; j++) - { - i.s[j] = static_cast(dists[j](gen)); - } - } - - input.insert(input.begin(), special_cases.begin(), special_cases.end()); - input.resize(count); - return input; -} - - -template -std::vector generate_input(size_t count, - const type& min, - const type& max, - const std::vector special_cases, - typename std::enable_if< - !is_vector_type::value - && std::is_integral::value - // std::uniform_int_distribution<> does not work in VS2015 for cl_uchar and cl_char, - // because VS2015 thinks that use cl_int, because VS2015 thinks cl_uchar cl_char are - // not int types - && !(std::is_same::value || std::is_same::value) - >::type* = 0) -{ - std::vector input(count); - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution dis(min, max); - for(auto& i : input) - { - i = dis(gen); - } - - input.insert(input.begin(), special_cases.begin(), special_cases.end()); - input.resize(count); - return input; -} - -template -std::vector generate_input(size_t count, - const type& min, - const type& max, - const std::vector special_cases, - typename std::enable_if< - !is_vector_type::value - && std::is_integral::value - // std::uniform_int_distribution<> does not work in VS2015 for cl_uchar and cl_char, - // because VS2015 thinks that use cl_int, because VS2015 thinks cl_uchar cl_char are - // not int types - && (std::is_same::value || std::is_same::value) - >::type* = 0) -{ - std::vector input(count); - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution dis( - static_cast(min), static_cast(max) - ); - for(auto& i : input) - { - i = static_cast(dis(gen)); - } - - input.insert(input.begin(), special_cases.begin(), special_cases.end()); - input.resize(count); - return input; -} - -template -std::vector generate_input(size_t count, - const type& min, - const type& max, - const std::vector special_cases, - typename std::enable_if< - is_vector_type::value - && std::is_floating_point::type>::value - >::type* = 0) -{ - typedef typename scalar_type::type SCALAR; - const size_t vec_size = vector_size::value; - - std::vector input(count); - std::random_device rd; - std::mt19937 gen(rd()); - std::vector> dists(vec_size); - for(size_t i = 0; i < vec_size; i++) - { - // Fatal error - if(std::fpclassify(max.s[i]) == FP_SUBNORMAL || std::fpclassify(min.s[i]) == FP_SUBNORMAL) - { - log_error("ERROR: min and max value for input generation CAN NOT BE subnormal\n"); - } - dists[i] = std::uniform_real_distribution(min.s[i], max.s[i]); - } - for(auto& i : input) - { - for(size_t j = 0; j < vec_size; j++) - { - SCALAR x = dists[j](gen); - while(std::fpclassify(x) == FP_SUBNORMAL) - { - x = dists[j](gen); - } - i.s[j] = x; - } - } - - input.insert(input.begin(), special_cases.begin(), special_cases.end()); - input.resize(count); - return input; -} - -template -std::vector generate_input(size_t count, - const type& min, - const type& max, - const std::vector special_cases, - typename std::enable_if< - !is_vector_type::value - && std::is_floating_point::value - >::type* = 0) -{ - // Fatal error - if(std::fpclassify(max) == FP_SUBNORMAL || std::fpclassify(min) == FP_SUBNORMAL) - { - log_error("ERROR: min and max value for input generation CAN NOT BE subnormal\n"); - } - std::vector input(count); - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_real_distribution dis(min, max); - for(auto& i : input) - { - type x = dis(gen); - while(std::fpclassify(x) == FP_SUBNORMAL) - { - x = dis(gen); - } - i = x; - } - - input.insert(input.begin(), special_cases.begin(), special_cases.end()); - input.resize(count); - return input; -} - -template -std::vector generate_output(size_t count, - typename scalar_type::type svalue = typename scalar_type::type(0), - typename std::enable_if::value>::type* = 0) -{ - type value; - for(size_t i = 0; i < vector_size::value; i++) - value.s[i] = svalue; - return std::vector(count, value); -} - -template -std::vector generate_output(size_t count, - type svalue = type(0), - typename std::enable_if::value>::type* = 0) -{ - return std::vector(count, svalue); -} - -template -void prepare_special_cases(std::vector& in1_spec_cases, std::vector& in2_spec_cases) -{ - if(in1_spec_cases.empty() || in2_spec_cases.empty()) - { - return; - } - - size_t new_size = in1_spec_cases.size() * in2_spec_cases.size(); - std::vector new_in1(new_size); - std::vector new_in2(new_size); - for(size_t i = 0; i < in1_spec_cases.size(); i++) - { - for(size_t j = 0; j < in2_spec_cases.size(); j++) - { - new_in1[(i * in2_spec_cases.size()) + j] = in1_spec_cases[i]; - new_in2[(i * in2_spec_cases.size()) + j] = in2_spec_cases[j]; - } - } - in1_spec_cases = new_in1; - in2_spec_cases = new_in2; -} - -template -void prepare_special_cases(std::vector& in1_spec_cases, - std::vector& in2_spec_cases, - std::vector& in3_spec_cases) -{ - if(in3_spec_cases.empty()) - { - return prepare_special_cases(in1_spec_cases, in2_spec_cases); - } - else if (in2_spec_cases.empty()) - { - return prepare_special_cases(in1_spec_cases, in3_spec_cases); - } - else if (in1_spec_cases.empty()) - { - return prepare_special_cases(in2_spec_cases, in3_spec_cases); - } - - size_t new_size = in1_spec_cases.size() * in2_spec_cases.size() * in3_spec_cases.size(); - std::vector new_in1(new_size); - std::vector new_in2(new_size); - std::vector new_in3(new_size); - for(size_t i = 0; i < in1_spec_cases.size(); i++) - { - for(size_t j = 0; j < in2_spec_cases.size(); j++) - { - for(size_t k = 0; k < in3_spec_cases.size(); k++) - { - size_t idx = - (i * in2_spec_cases.size() * in3_spec_cases.size()) - + (j * in3_spec_cases.size()) - + k; - new_in1[idx] = in1_spec_cases[i]; - new_in2[idx] = in2_spec_cases[j]; - new_in3[idx] = in3_spec_cases[k]; - } - } - } - in1_spec_cases = new_in1; - in2_spec_cases = new_in2; - in3_spec_cases = new_in3; -} - -#endif // TEST_CONFORMANCE_CLCPP_UTILS_TEST_GENERATE_INPUTS_HPP diff --git a/test_conformance/clcpp/utils_test/ternary.hpp b/test_conformance/clcpp/utils_test/ternary.hpp deleted file mode 100644 index 2a6f6b551a..0000000000 --- a/test_conformance/clcpp/utils_test/ternary.hpp +++ /dev/null @@ -1,364 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_UTILS_TEST_TERNARY_HPP -#define TEST_CONFORMANCE_CLCPP_UTILS_TEST_TERNARY_HPP - -#include -#include -#include -#include - -#include "../common.hpp" - -#include "detail/base_func_type.hpp" -#include "generate_inputs.hpp" -#include "compare.hpp" - -template -struct ternary_func : public detail::base_func_type -{ - typedef IN1 in1_type; - typedef IN2 in2_type; - typedef IN3 in3_type; - typedef OUT1 out_type; - - virtual ~ternary_func() {}; - virtual std::string str() = 0; - - std::string decl_str() - { - return type_name() + "(" + type_name() + ", " + type_name()+ ", " + type_name() + ")"; - } - - bool is_in1_bool() - { - return false; - } - - bool is_in2_bool() - { - return false; - } - - bool is_in3_bool() - { - return false; - } - - IN1 min1() - { - return detail::get_min(); - } - - IN1 max1() - { - return detail::get_max(); - } - - IN2 min2() - { - return detail::get_min(); - } - - IN2 max2() - { - return detail::get_max(); - } - - IN3 min3() - { - return detail::get_min(); - } - - IN3 max3() - { - return detail::get_max(); - } - - std::vector in1_special_cases() - { - return { }; - } - - std::vector in2_special_cases() - { - return { }; - } - - std::vector in3_special_cases() - { - return { }; - } - - template - typename make_vector_type::value>::type - delta(const IN1& in1, const IN2& in2, const IN3& in3, const T& expected) - { - typedef - typename make_vector_type::value>::type - delta_vector_type; - // Take care of unused variable warning - (void) in1; - (void) in2; - (void) in3; - auto e = detail::make_value(1e-3); - return detail::multiply(e, expected); - } -}; - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -template -std::string generate_kernel_ternary(func_type func) -{ - std::string in1_value = "input1[gid]"; - if(func.is_in1_bool()) - { - std::string i = vector_size::value == 1 ? "" : std::to_string(vector_size::value); - in1_value = "(input1[gid] != (int" + i + ")(0))"; - } - std::string in2_value = "input2[gid]"; - if(func.is_in2_bool()) - { - std::string i = vector_size::value == 1 ? "" : std::to_string(vector_size::value); - in2_value = "(input2[gid] != (int" + i + ")(0))"; - } - std::string in3_value = "input3[gid]"; - if(func.is_in3_bool()) - { - std::string i = vector_size::value == 1 ? "" : std::to_string(vector_size::value); - in3_value = "(input3[gid] != (int" + i + ")(0))"; - } - std::string function_call = func.str() + "(" + in1_value + ", " + in2_value + ", " + in3_value + ")"; - if(func.is_out_bool()) - { - std::string i = vector_size::value == 1 ? "" : std::to_string(vector_size::value); - function_call = "convert_int" + i + "(" + func.str() + "(" + in1_value + ", " + in2_value + ", " + in3_value + "))"; - } - return - "__kernel void " + func.get_kernel_name() + "(global " + type_name() + " *input1,\n" - " global " + type_name() + " *input2,\n" - " global " + type_name() + " *input3,\n" - " global " + type_name() + " *output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " output[gid] = " + function_call + ";\n" - "}\n"; -} -#else -template -std::string generate_kernel_ternary(func_type func) -{ - std::string headers = func.headers(); - std::string in1_value = "input1[gid]"; - if(func.is_in1_bool()) - { - std::string i = vector_size::value == 1 ? "" : std::to_string(vector_size::value); - in1_value = "(input1[gid] != (int" + i + ")(0))"; - } - std::string in2_value = "input2[gid]"; - if(func.is_in2_bool()) - { - std::string i = vector_size::value == 1 ? "" : std::to_string(vector_size::value); - in2_value = "(input2[gid] != (int" + i + ")(0))"; - } - std::string in3_value = "input3[gid]"; - if(func.is_in3_bool()) - { - std::string i = vector_size::value == 1 ? "" : std::to_string(vector_size::value); - in3_value = "(input3[gid] != (int" + i + ")(0))"; - } - std::string function_call = func.str() + "(" + in1_value + ", " + in2_value + ", " + in3_value + ")"; - if(func.is_out_bool()) - { - std::string i = vector_size::value == 1 ? "" : std::to_string(vector_size::value); - function_call = "convert_cast(" + func.str() + "(" + in1_value + ", " + in2_value + ", " + in3_value + "))"; - } - if(func.is_out_bool() || func.is_in1_bool() || func.is_in2_bool() || func.is_in3_bool()) - { - if(headers.find("#include ") == std::string::npos) - { - headers += "#include \n"; - } - } - return - "" + func.defs() + - "" + headers + - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void " + func.get_kernel_name() + "(global_ptr<" + type_name() + "[]> input1,\n" - " global_ptr<" + type_name() + "[]> input2,\n" - " global_ptr<" + type_name() + "[]> input3,\n" - " global_ptr<" + type_name() + "[]> output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " output[gid] = " + function_call + ";\n" - "}\n"; -} -#endif - -template -bool verify_ternary(const std::vector &in1, - const std::vector &in2, - const std::vector &in3, - const std::vector &out, - ternary_op op) -{ - for(size_t i = 0; i < in1.size(); i++) - { - auto expected = op(in1[i], in2[i], in3[i]); - if(!are_equal(expected, out[i], op.delta(in1[i], in2[i], in3[i], expected), op)) - { - print_error_msg(expected, out[i], i, op); - return false; - } - } - return true; -} - -template -int test_ternary_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, ternary_op op) -{ - cl_mem buffers[4]; - cl_program program; - cl_kernel kernel; - size_t work_size[1]; - int err; - - typedef typename ternary_op::in1_type INPUT1; - typedef typename ternary_op::in2_type INPUT2; - typedef typename ternary_op::in3_type INPUT3; - typedef typename ternary_op::out_type OUTPUT; - - // Don't run test for unsupported types - if(!(type_supported(device) - && type_supported(device) - && type_supported(device) - && type_supported(device))) - { - return CL_SUCCESS; - } - - std::string code_str = generate_kernel_ternary(op); - std::string kernel_name = op.get_kernel_name(); - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name); - RETURN_ON_ERROR(err) - return err; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false); - RETURN_ON_ERROR(err) -#else - err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name); - RETURN_ON_ERROR(err) -#endif - - std::vector in1_spec_cases = op.in1_special_cases(); - std::vector in2_spec_cases = op.in2_special_cases(); - std::vector in3_spec_cases = op.in3_special_cases(); - prepare_special_cases(in1_spec_cases, in2_spec_cases, in3_spec_cases); - std::vector input1 = generate_input(count, op.min1(), op.max1(), in1_spec_cases); - std::vector input2 = generate_input(count, op.min2(), op.max2(), in2_spec_cases); - std::vector input3 = generate_input(count, op.min3(), op.max3(), in3_spec_cases); - std::vector output = generate_output(count); - - buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(INPUT1) * input1.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer") - - buffers[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(INPUT2) * input2.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer") - - buffers[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(INPUT3) * input3.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer") - - buffers[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(OUTPUT) * output.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer") - - err = clEnqueueWriteBuffer( - queue, buffers[0], CL_TRUE, 0, sizeof(INPUT1) * input1.size(), - static_cast(input1.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer"); - - err = clEnqueueWriteBuffer( - queue, buffers[1], CL_TRUE, 0, sizeof(INPUT2) * input2.size(), - static_cast(input2.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer"); - - err = clEnqueueWriteBuffer( - queue, buffers[2], CL_TRUE, 0, sizeof(INPUT3) * input3.size(), - static_cast(input3.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer"); - - err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]); - err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]); - err |= clSetKernelArg(kernel, 2, sizeof(buffers[2]), &buffers[2]); - err |= clSetKernelArg(kernel, 3, sizeof(buffers[3]), &buffers[3]); - RETURN_ON_CL_ERROR(err, "clSetKernelArg"); - - work_size[0] = count; - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel"); - - err = clEnqueueReadBuffer( - queue, buffers[3], CL_TRUE, 0, sizeof(OUTPUT) * output.size(), - static_cast(output.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer"); - - if (!verify_ternary(input1, input2, input3, output, op)) - { - RETURN_ON_ERROR_MSG(-1, - "test_%s %s(%s, %s, %s) failed", op.str().c_str(), - type_name().c_str(), - type_name().c_str(), - type_name().c_str(), - type_name().c_str() - ); - } - log_info( - "test_%s %s(%s, %s, %s) passed\n", op.str().c_str(), - type_name().c_str(), - type_name().c_str(), - type_name().c_str(), - type_name().c_str() - ); - - clReleaseMemObject(buffers[0]); - clReleaseMemObject(buffers[1]); - clReleaseMemObject(buffers[2]); - clReleaseMemObject(buffers[3]); - clReleaseKernel(kernel); - clReleaseProgram(program); - return err; -} - -#endif // TEST_CONFORMANCE_CLCPP_UTILS_TEST_TERNARY_HPP diff --git a/test_conformance/clcpp/utils_test/unary.hpp b/test_conformance/clcpp/utils_test/unary.hpp deleted file mode 100644 index 456ad3f02f..0000000000 --- a/test_conformance/clcpp/utils_test/unary.hpp +++ /dev/null @@ -1,259 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_UTILS_TEST_UNARY_HPP -#define TEST_CONFORMANCE_CLCPP_UTILS_TEST_UNARY_HPP - -#include -#include -#include -#include - -#include "../common.hpp" - -#include "detail/base_func_type.hpp" -#include "generate_inputs.hpp" -#include "compare.hpp" - -template -struct unary_func : public detail::base_func_type -{ - typedef IN1 in_type; - typedef OUT1 out_type; - - virtual ~unary_func() {}; - virtual std::string str() = 0; - - // Return string with function type, for example: int(float). - std::string decl_str() - { - return type_name() + "(" + type_name() + ")"; - } - - // Return true if IN1 type in OpenCL kernel should be treated - // as bool type; false otherwise. - bool is_in1_bool() - { - return false; - } - - // Return min value that can be used as a first argument. - IN1 min1() - { - return detail::get_min(); - } - - // Return max value that can be used as a first argument. - IN1 max1() - { - return detail::get_max(); - } - - // This returns a list of special cases input values we want to - // test. - std::vector in_special_cases() - { - return { }; - } - - // Max error. Error should be raised if - // abs(result - expected) > delta(.., expected) - // - // Default value: 0.001 * expected - // - // (This effects how are_equal() function works, - // it may not have effect if verify() method in derived - // class does not use are_equal() function.) - // - // Only for FP numbers/vectors - template - typename make_vector_type::value>::type - delta(const IN1& in1, const T& expected) - { - typedef - typename make_vector_type::value>::type - delta_vector_type; - // Take care of unused variable warning - (void) in1; - auto e = detail::make_value(1e-3); - return detail::multiply(e, expected); - } -}; - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -template -std::string generate_kernel_unary(func_type func) -{ - std::string in1_value = "input[gid]"; - // Convert uintN to boolN values - if(func.is_in1_bool()) - { - std::string i = vector_size::value == 1 ? "" : std::to_string(vector_size::value); - in1_value = "(input[gid] != (int" + i + ")(0))"; - } - std::string function_call = func.str() + "(" + in1_value + ");"; - // Convert boolN result of funtion func_type to uintN - if(func.is_out_bool()) - { - std::string i = vector_size::value == 1 ? "" : std::to_string(vector_size::value); - function_call = "convert_int" + i + "(" + func.str() + "(" + in1_value + "))"; - } - return - "__kernel void " + func.get_kernel_name() + "(global " + type_name() + " *input, global " + type_name() + " *output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " output[gid] = " + function_call + ";\n" - "}\n"; -} -#else -template -std::string generate_kernel_unary(func_type func) -{ - std::string headers = func.headers(); - std::string in1_value = "input[gid]"; - if(func.is_in1_bool()) - { - std::string i = vector_size::value == 1 ? "" : std::to_string(vector_size::value); - in1_value = "(input[gid] != (int" + i + ")(0))"; - } - std::string function_call = func.str() + "(" + in1_value + ")"; - if(func.is_out_bool()) - { - std::string i = vector_size::value == 1 ? "" : std::to_string(vector_size::value); - function_call = "convert_cast(" + func.str() + "(" + in1_value + "))"; - } - if(func.is_out_bool() || func.is_in1_bool()) - { - if(headers.find("#include ") == std::string::npos) - { - headers += "#include \n"; - } - } - return - "" + func.defs() + - "" + headers + - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void " + func.get_kernel_name() + "(global_ptr<" + type_name() + "[]> input," - "global_ptr<" + type_name() + "[]> output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " output[gid] = " + function_call + ";\n" - "}\n"; -} -#endif - -template -bool verify_unary(const std::vector &in, const std::vector &out, unary_op op) -{ - for(size_t i = 0; i < in.size(); i++) - { - auto expected = op(in[i]); - if(!are_equal(expected, out[i], op.delta(in[i], expected), op)) - { - print_error_msg(expected, out[i], i, op); - return false; - } - } - return true; -} - -template -int test_unary_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, unary_op op) -{ - cl_mem buffers[2]; - cl_program program; - cl_kernel kernel; - size_t work_size[1]; - int err; - - typedef typename unary_op::in_type INPUT; - typedef typename unary_op::out_type OUTPUT; - - // Don't run test for unsupported types - if(!(type_supported(device) && type_supported(device))) - { - return CL_SUCCESS; - } - - std::string code_str = generate_kernel_unary(op); - std::string kernel_name = op.get_kernel_name(); - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name); - RETURN_ON_ERROR(err) - return err; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false); - RETURN_ON_ERROR(err) -#else - err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name); - RETURN_ON_ERROR(err) -#endif - - std::vector input = generate_input(count, op.min1(), op.max1(), op.in_special_cases()); - std::vector output = generate_output(count); - - buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(INPUT) * input.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer") - - buffers[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(OUTPUT) * output.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer") - - err = clEnqueueWriteBuffer( - queue, buffers[0], CL_TRUE, 0, sizeof(INPUT) * input.size(), - static_cast(input.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer"); - - err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]); - err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]); - RETURN_ON_CL_ERROR(err, "clSetKernelArg"); - - work_size[0] = count; - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel"); - - err = clEnqueueReadBuffer( - queue, buffers[1], CL_TRUE, 0, sizeof(OUTPUT) * output.size(), - static_cast(output.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer"); - - if (!verify_unary(input, output, op)) - { - RETURN_ON_ERROR_MSG(-1, "test_%s %s(%s) failed", op.str().c_str(), type_name().c_str(), type_name().c_str()); - } - log_info("test_%s %s(%s) passed\n", op.str().c_str(), type_name().c_str(), type_name().c_str()); - - clReleaseMemObject(buffers[0]); - clReleaseMemObject(buffers[1]); - clReleaseKernel(kernel); - clReleaseProgram(program); - return err; -} - -#endif // TEST_CONFORMANCE_CLCPP_UTILS_TEST_UNARY_HPP diff --git a/test_conformance/clcpp/vload_vstore/CMakeLists.txt b/test_conformance/clcpp/vload_vstore/CMakeLists.txt deleted file mode 100644 index c66cb6f75c..0000000000 --- a/test_conformance/clcpp/vload_vstore/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -set(MODULE_NAME CPP_VLOAD_VSTORE_FUNCS) - -set(${MODULE_NAME}_SOURCES - main.cpp -) - -include(../../CMakeCommon.txt) diff --git a/test_conformance/clcpp/vload_vstore/common.hpp b/test_conformance/clcpp/vload_vstore/common.hpp deleted file mode 100644 index d78d765411..0000000000 --- a/test_conformance/clcpp/vload_vstore/common.hpp +++ /dev/null @@ -1,82 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMMON_HPP -#define TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMMON_HPP - -#include -#include - -#include "../common.hpp" -#include "../funcs_test_utils.hpp" - -#include "half_utils.hpp" -#include - -// Generates cl_half input -std::vector generate_half_input(size_t count, - const cl_float& min, - const cl_float& max, - const std::vector special_cases) -{ - std::vector input(count); - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_real_distribution dis(min, max); - for(auto& i : input) - { - i = cl_half_from_float(dis(gen), CL_HALF_RTE); - } - - input.insert(input.begin(), special_cases.begin(), special_cases.end()); - input.resize(count); - return input; -} - -// Generates input for vload_vstore tests, we can't just simply use function -// generate_input(...), because cl_half is typedef of cl_short (but generating -// cl_shorts and generating cl_halfs are different operations). -template -std::vector vload_vstore_generate_input(size_t count, - const type& min, - const type& max, - const std::vector special_cases, - const bool generate_half, - typename std::enable_if< - std::is_same::value - >::type* = 0) -{ - if(!generate_half) - { - return generate_input(count, min, max, special_cases); - } - return generate_half_input(count, -(CL_HALF_MAX/4.f), (CL_HALF_MAX/4.f), special_cases); -} - -// If !std::is_same::value, we can just use generate_input(...). -template -std::vector vload_vstore_generate_input(size_t count, - const type& min, - const type& max, - const std::vector special_cases, - const bool generate_half, - typename std::enable_if< - !std::is_same::value - >::type* = 0) -{ - return generate_input(count, min, max, special_cases); -} - -#endif // TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMMON_HPP diff --git a/test_conformance/clcpp/vload_vstore/half_utils.hpp b/test_conformance/clcpp/vload_vstore/half_utils.hpp deleted file mode 100644 index ce7ae822ce..0000000000 --- a/test_conformance/clcpp/vload_vstore/half_utils.hpp +++ /dev/null @@ -1,54 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_HALF_UTILS_HPP -#define TEST_CONFORMANCE_CLCPP_HALF_UTILS_HPP - -#include "../common.hpp" -#include "../funcs_test_utils.hpp" - -#include - -namespace detail -{ - -template -inline int clz(INT_TYPE x) -{ - int count = 0; - if(std::is_unsigned::value) - { - cl_ulong value = x; - value <<= 8 * sizeof(value) - (8 * sizeof(x)); - for(count = 0; 0 == (value & (CL_LONG_MIN)); count++) - { - value <<= 1; - } - } - else - { - cl_long value = x; - value <<= 8 * sizeof(value) - (8 * sizeof(x)); - for(count = 0; 0 == (value & (CL_LONG_MIN)); count++) - { - value <<= 1; - } - } - return count; -} - -} // namespace detail - -#endif // TEST_CONFORMANCE_CLCPP_HALF_UTILS_HPP diff --git a/test_conformance/clcpp/vload_vstore/main.cpp b/test_conformance/clcpp/vload_vstore/main.cpp deleted file mode 100644 index e5c4fdd062..0000000000 --- a/test_conformance/clcpp/vload_vstore/main.cpp +++ /dev/null @@ -1,25 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "../common.hpp" - -#include "vload_funcs.hpp" -#include "vstore_funcs.hpp" - -int main(int argc, const char *argv[]) -{ - auto& tests = autotest::test_suite::global_test_suite().test_defs; - return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0); -} diff --git a/test_conformance/clcpp/vload_vstore/vload_funcs.hpp b/test_conformance/clcpp/vload_vstore/vload_funcs.hpp deleted file mode 100644 index cb9415e0b8..0000000000 --- a/test_conformance/clcpp/vload_vstore/vload_funcs.hpp +++ /dev/null @@ -1,367 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_VLOAD_VSTORE_FUNCS_VLOAD_FUNCS_HPP -#define TEST_CONFORMANCE_CLCPP_VLOAD_VSTORE_FUNCS_VLOAD_FUNCS_HPP - -#include - -#include "../common.hpp" -#include "../funcs_test_utils.hpp" - -#include "common.hpp" - -#include - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -template -std::string generate_kernel_vload(func_type func) -{ - std::string input1_type_str = type_name(); - if(func.is_in1_half()) - { - input1_type_str = "half"; - } - std::string output1_type_str = type_name(); - if(N == 3) - { - output1_type_str[output1_type_str.size() - 1] = '3'; - } - return - "__kernel void test_" + func.str() + "(global " + input1_type_str + " *input, global " + output1_type_str + " *output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " output[gid] = " + func.str() + std::to_string(N) + "(gid, input);\n" - "}\n"; -} -#else -template -std::string generate_kernel_vload(func_type func) -{ - std::string input1_type_str = type_name(); - if(func.is_in1_half()) - { - input1_type_str = "half"; - } - std::string output1_type_str = type_name(); - if(N == 3) - { - output1_type_str[output1_type_str.size() - 1] = '3'; - } - return - "" + func.defs() + - "" + func.headers() + - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void test_" + func.str() + "(global_ptr<" + input1_type_str + "[]> input," - "global_ptr<" + output1_type_str + "[]> output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " output[gid] = " + func.str() + "<" + std::to_string(N) + ">(gid, input.get());\n" - "}\n"; -} -#endif - -template -bool verify_vload(const std::vector &in, const std::vector &out, vload_op op) -{ - for(size_t i = 0; i < out.size(); i++) - { - auto expected = op(i, in.begin()); - for(size_t j = 0; j < vload_op::vector_size; j++) - { - size_t idx = (i * vector_size::value) + j; - if(!are_equal(expected.s[j], out[i].s[j], op.delta(in[idx], expected.s[j]), op)) - { - print_error_msg(expected, out[i], i, op); - return false; - } - } - } - return true; -} - -template -int test_vload_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, vload_op op) -{ - cl_mem buffers[2]; - cl_program program; - cl_kernel kernel; - size_t work_size[1]; - int err; - - typedef typename vload_op::in_type INPUT; - typedef typename vload_op::out_type OUTPUT; - - // Don't run test for unsupported types - if(!(type_supported(device) && type_supported(device))) - { - return CL_SUCCESS; - } - - std::string code_str = generate_kernel_vload(op); - std::string kernel_name("test_"); kernel_name += op.str(); - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name); - RETURN_ON_ERROR(err) - return err; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false); - RETURN_ON_ERROR(err) -#else - err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name); - RETURN_ON_ERROR(err) -#endif - - std::vector input = vload_vstore_generate_input( - count * vector_size::value, op.min1(), op.max1(), op.in_special_cases(), op.is_in1_half() - ); - std::vector output = generate_output(count); - - buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(INPUT) * input.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - buffers[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(OUTPUT) * output.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - err = clEnqueueWriteBuffer( - queue, buffers[0], CL_TRUE, 0, sizeof(INPUT) * input.size(), - static_cast(input.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer"); - - err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]); - err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]); - RETURN_ON_CL_ERROR(err, "clSetKernelArg"); - - work_size[0] = count; - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel"); - - err = clEnqueueReadBuffer( - queue, buffers[1], CL_TRUE, 0, sizeof(OUTPUT) * output.size(), - static_cast(output.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer"); - - if (!verify_vload(input, output, op)) - { - RETURN_ON_ERROR_MSG(-1, "test_%s %s(%s) failed", - op.str().c_str(), - type_name().c_str(), - type_name().c_str() - ); - } - log_info("test_%s %s(%s) passed\n", op.str().c_str(), type_name().c_str(), type_name().c_str()); - - clReleaseMemObject(buffers[0]); - clReleaseMemObject(buffers[1]); - clReleaseKernel(kernel); - clReleaseProgram(program); - return err; -} - -template -struct vload_func : public unary_func< - IN1, - typename make_vector_type::type /* create IN1N type */ - > -{ - typedef typename make_vector_type::type result_type; - const static size_t vector_size = N; - - std::string str() - { - return "vload"; - } - - std::string headers() - { - return "#include \n"; - } - - template - result_type operator()(const size_t offset, Iterator x) - { - static_assert( - !is_vector_type::value, - "IN1 must be scalar type" - ); - static_assert( - std::is_same::value_type, IN1>::value, - "std::iterator_traits::value_type must be IN1" - ); - - typedef typename std::iterator_traits::difference_type diff_type; - - result_type r; - Iterator temp = x + static_cast(offset * N); - for(size_t i = 0; i < N; i++) - { - r.s[i] = *temp; - temp++; - } - return r; - } - - bool is_in1_half() - { - return false; - } -}; - -template -struct vload_half_func : public unary_func< - cl_half, - typename make_vector_type::type /* create IN1N type */ - > -{ - typedef typename make_vector_type::type result_type; - const static size_t vector_size = N; - - std::string str() - { - return "vload_half"; - } - - std::string headers() - { - return "#include \n"; - } - - template - result_type operator()(const size_t offset, Iterator x) - { - static_assert( - std::is_same::value_type, cl_half>::value, - "std::iterator_traits::value_type must be cl_half" - ); - - typedef typename std::iterator_traits::difference_type diff_type; - - result_type r; - Iterator temp = x + static_cast(offset * N); - for(size_t i = 0; i < N; i++) - { - r.s[i] = cl_half_to_float(*temp); - temp++; - } - return r; - } - - bool is_in1_half() - { - return true; - } -}; - -template -struct vloada_half_func : public unary_func< - cl_half, - typename make_vector_type::type /* create IN1N type */ - > -{ - typedef typename make_vector_type::type result_type; - const static size_t vector_size = N; - - std::string str() - { - return "vloada_half"; - } - - std::string headers() - { - return "#include \n"; - } - - template - result_type operator()(const size_t offset, Iterator x) - { - static_assert( - std::is_same::value_type, cl_half>::value, - "std::iterator_traits::value_type must be cl_half" - ); - - typedef typename std::iterator_traits::difference_type diff_type; - - result_type r; - size_t alignment = N == 3 ? 4 : N; - Iterator temp = x + static_cast(offset * alignment); - for(size_t i = 0; i < N; i++) - { - r.s[i] = cl_half_to_float(*temp); - temp++; - } - return r; - } - - bool is_in1_half() - { - return true; - } -}; - -AUTO_TEST_CASE(test_vload_funcs) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - -#define TEST_VLOAD_FUNC_MACRO(CLASS) \ - last_error = test_vload_func( \ - device, context, queue, n_elems, CLASS \ - ); \ - CHECK_ERROR(last_error) \ - error |= last_error; - - TEST_VLOAD_FUNC_MACRO((vload_func())) - TEST_VLOAD_FUNC_MACRO((vload_func())) - TEST_VLOAD_FUNC_MACRO((vload_func())) - TEST_VLOAD_FUNC_MACRO((vload_func())) - - TEST_VLOAD_FUNC_MACRO((vload_half_func<2>())) - TEST_VLOAD_FUNC_MACRO((vload_half_func<3>())) - TEST_VLOAD_FUNC_MACRO((vload_half_func<4>())) - TEST_VLOAD_FUNC_MACRO((vload_half_func<8>())) - TEST_VLOAD_FUNC_MACRO((vload_half_func<16>())) - - TEST_VLOAD_FUNC_MACRO((vloada_half_func<2>())) - TEST_VLOAD_FUNC_MACRO((vloada_half_func<3>())) - TEST_VLOAD_FUNC_MACRO((vloada_half_func<4>())) - TEST_VLOAD_FUNC_MACRO((vloada_half_func<8>())) - TEST_VLOAD_FUNC_MACRO((vloada_half_func<16>())) - -#undef TEST_VLOAD_FUNC_MACRO - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_VLOAD_VSTORE_FUNCS_VLOAD_FUNCS_HPP diff --git a/test_conformance/clcpp/vload_vstore/vstore_funcs.hpp b/test_conformance/clcpp/vload_vstore/vstore_funcs.hpp deleted file mode 100644 index 7ffc584ea4..0000000000 --- a/test_conformance/clcpp/vload_vstore/vstore_funcs.hpp +++ /dev/null @@ -1,349 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_VLOAD_VSTORE_FUNCS_VSTORE_FUNCS_HPP -#define TEST_CONFORMANCE_CLCPP_VLOAD_VSTORE_FUNCS_VSTORE_FUNCS_HPP - -#include - -#include "../common.hpp" -#include "../funcs_test_utils.hpp" - -#include "common.hpp" - -#include - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -template -std::string generate_kernel_vstore(func_type func) -{ - std::string input1_type_str = type_name(); - if(N == 3) - { - input1_type_str[input1_type_str.size() - 1] = '3'; - } - std::string output1_type_str = type_name(); - if(func.is_out_half()) - { - output1_type_str = "half"; - } - return - "__kernel void test_" + func.str() + "(global " + input1_type_str + " *input, global " + output1_type_str + " *output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " " + func.str() + std::to_string(N) + "(input[gid], gid, output);\n" - "}\n"; -} -#else -template -std::string generate_kernel_vstore(func_type func) -{ - std::string input1_type_str = type_name(); - if(N == 3) - { - input1_type_str[input1_type_str.size() - 1] = '3'; - } - std::string output1_type_str = type_name(); - if(func.is_out_half()) - { - output1_type_str = "half"; - } - return - "" + func.defs() + - "" + func.headers() + - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void test_" + func.str() + "(global_ptr<" + input1_type_str + "[]> input," - "global_ptr<" + output1_type_str + "[]> output)\n" - "{\n" - " size_t gid = get_global_id(0);\n" - " " + func.str() + "(input[gid], gid, output.get());\n" - "}\n"; -} -#endif - -template -bool verify_vstore(const std::vector &in, const std::vector &out, vload_op op) -{ - for(size_t i = 0; i < in.size(); i++) - { - auto expected = op(in[i]); - for(size_t j = 0; j < vload_op::vector_size; j++) - { - size_t idx = (i * vload_op::vec_alignment) + j; - if(!are_equal(expected.s[j], out[idx], op.delta(in[i], expected).s[j], op)) - { - print_error_msg(expected.s[j], out[idx], idx, op); - return false; - } - } - } - return true; -} - -template -int test_vstore_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, vload_op op) -{ - cl_mem buffers[2]; - cl_program program; - cl_kernel kernel; - size_t work_size[1]; - int err; - - typedef typename vload_op::in_type INPUT; - typedef typename vload_op::out_type OUTPUT; - - // Don't run test for unsupported types - if(!(type_supported(device) && type_supported(device))) - { - return CL_SUCCESS; - } - - std::string code_str = generate_kernel_vstore(op); - std::string kernel_name("test_"); kernel_name += op.str(); - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name); - RETURN_ON_ERROR(err) - return err; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false); - RETURN_ON_ERROR(err) -#else - err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name); - RETURN_ON_ERROR(err) -#endif - - std::vector input = generate_input(count, op.min1(), op.max1(), op.in_special_cases()); - std::vector output = generate_output(count * vector_size::value); - - buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(INPUT) * input.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - buffers[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(OUTPUT) * output.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - err = clEnqueueWriteBuffer( - queue, buffers[0], CL_TRUE, 0, sizeof(INPUT) * input.size(), - static_cast(input.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer"); - - err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]); - err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]); - RETURN_ON_CL_ERROR(err, "clSetKernelArg"); - - work_size[0] = count; - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel"); - - err = clEnqueueReadBuffer( - queue, buffers[1], CL_TRUE, 0, sizeof(OUTPUT) * output.size(), - static_cast(output.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer"); - - if (!verify_vstore(input, output, op)) - { - RETURN_ON_ERROR_MSG(-1, "test_%s %s(%s) failed", op.str().c_str(), type_name().c_str(), type_name().c_str()); - } - log_info("test_%s %s(%s) passed\n", op.str().c_str(), type_name().c_str(), type_name().c_str()); - - clReleaseMemObject(buffers[0]); - clReleaseMemObject(buffers[1]); - clReleaseKernel(kernel); - clReleaseProgram(program); - return err; -} - -template -struct vstore_func : public unary_func< - typename make_vector_type::type, - T - > -{ - typedef typename make_vector_type::type input1_type; - typedef typename make_vector_type::type result_type; - const static size_t vector_size = N; - const static size_t vec_alignment = N; - - std::string str() - { - return "vstore"; - } - - std::string headers() - { - return "#include \n"; - } - - result_type operator()(const input1_type& in) - { - static_assert( - !is_vector_type::value, - "T must be scalar type" - ); - return in; - } - - bool is_out_half() - { - return false; - } -}; - -template -struct vstore_half_func : public unary_func< - typename make_vector_type::type, - cl_half - > -{ - typedef typename make_vector_type::type input1_type; - typedef typename make_vector_type::type result_type; - const static size_t vector_size = N; - const static size_t vec_alignment = N; - - std::string str() - { - return "vstore_half"; - } - - std::string headers() - { - return "#include \n"; - } - - result_type operator()(const input1_type& in) - { - result_type r; - for(size_t i = 0; i < N; i++) - { - r.s[i] = cl_half_from_float(in.s[i], CL_HALF_RTE); - } - return r; - } - - input1_type min1() - { - return detail::make_value(-512.f); - } - - input1_type max1() - { - return detail::make_value(512.f); - } - - bool is_out_half() - { - return true; - } -}; - -template -struct vstorea_half_func : public unary_func< - typename make_vector_type::type, - cl_half - > -{ - typedef typename make_vector_type::type input1_type; - typedef typename make_vector_type::type result_type; - const static size_t vector_size = N; - const static size_t vec_alignment = N == 3 ? 4 : N; - - std::string str() - { - return "vstorea_half"; - } - - std::string headers() - { - return "#include \n"; - } - - result_type operator()(const input1_type& in) - { - result_type r; - for(size_t i = 0; i < N; i++) - { - r.s[i] = cl_half_from_float(in.s[i], CL_HALF_RTE); - } - return r; - } - - input1_type min1() - { - return detail::make_value(-512.f); - } - - input1_type max1() - { - return detail::make_value(512.f); - } - - bool is_out_half() - { - return true; - } -}; - -AUTO_TEST_CASE(test_vstore_funcs) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int last_error = CL_SUCCESS; - -#define TEST_VSTORE_FUNC_MACRO(CLASS) \ - last_error = test_vstore_func( \ - device, context, queue, n_elems, CLASS \ - ); \ - CHECK_ERROR(last_error) \ - error |= last_error; - - TEST_VSTORE_FUNC_MACRO((vstore_func())) - TEST_VSTORE_FUNC_MACRO((vstore_func())) - TEST_VSTORE_FUNC_MACRO((vstore_func())) - TEST_VSTORE_FUNC_MACRO((vstore_func())) - TEST_VSTORE_FUNC_MACRO((vstore_func())) - - TEST_VSTORE_FUNC_MACRO((vstore_half_func<2>())) - TEST_VSTORE_FUNC_MACRO((vstore_half_func<3>())) - TEST_VSTORE_FUNC_MACRO((vstore_half_func<4>())) - TEST_VSTORE_FUNC_MACRO((vstore_half_func<8>())) - TEST_VSTORE_FUNC_MACRO((vstore_half_func<16>())) - - TEST_VSTORE_FUNC_MACRO((vstorea_half_func<2>())) - TEST_VSTORE_FUNC_MACRO((vstorea_half_func<3>())) - -#undef TEST_VSTORE_FUNC_MACRO - - if(error != CL_SUCCESS) - { - return -1; - } - return error; -} - -#endif // TEST_CONFORMANCE_CLCPP_VLOAD_VSTORE_FUNCS_VSTORE_FUNCS_HPP diff --git a/test_conformance/clcpp/workgroups/CMakeLists.txt b/test_conformance/clcpp/workgroups/CMakeLists.txt deleted file mode 100644 index 812e982e3f..0000000000 --- a/test_conformance/clcpp/workgroups/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -set(MODULE_NAME CPP_WORKGROUPS) - -set(${MODULE_NAME}_SOURCES - main.cpp -) - -include(../../CMakeCommon.txt) diff --git a/test_conformance/clcpp/workgroups/common.hpp b/test_conformance/clcpp/workgroups/common.hpp deleted file mode 100644 index ab7b100d9f..0000000000 --- a/test_conformance/clcpp/workgroups/common.hpp +++ /dev/null @@ -1,97 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_WG_COMMON_HPP -#define TEST_CONFORMANCE_CLCPP_WG_COMMON_HPP - -#include -#include -#include - -enum class work_group_op : int { - add, min, max -}; - -std::string to_string(work_group_op op) -{ - switch (op) - { - case work_group_op::add: - return "add"; - case work_group_op::min: - return "min"; - case work_group_op::max: - return "max"; - default: - break; - } - return ""; -} - -template -std::vector generate_input(size_t count, size_t wg_size) -{ - std::vector input(count, CL_INT_TYPE(1)); - switch (op) - { - case work_group_op::add: - return input; - case work_group_op::min: - { - size_t j = wg_size; - for(size_t i = 0; i < count; i++) - { - input[i] = static_cast(j); - j--; - if(j == 0) - { - j = wg_size; - } - } - } - break; - case work_group_op::max: - { - size_t j = 0; - for(size_t i = 0; i < count; i++) - { - input[i] = static_cast(j); - j++; - if(j == wg_size) - { - j = 0; - } - } - } - } - return input; -} - -template -std::vector generate_output(size_t count, size_t wg_size) -{ - switch (op) - { - case work_group_op::add: - return std::vector(count, CL_INT_TYPE(0)); - case work_group_op::min: - return std::vector(count, (std::numeric_limits::max)()); - case work_group_op::max: - return std::vector(count, (std::numeric_limits::min)()); - } - return std::vector(count, CL_INT_TYPE(0)); -} - -#endif // TEST_CONFORMANCE_CLCPP_WG_COMMON_HPP diff --git a/test_conformance/clcpp/workgroups/main.cpp b/test_conformance/clcpp/workgroups/main.cpp deleted file mode 100644 index 924bb44c3a..0000000000 --- a/test_conformance/clcpp/workgroups/main.cpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "../common.hpp" - -#include "test_wg_all.hpp" -#include "test_wg_any.hpp" -#include "test_wg_broadcast.hpp" -#include "test_wg_reduce.hpp" -#include "test_wg_scan_inclusive.hpp" -#include "test_wg_scan_exclusive.hpp" - -int main(int argc, const char *argv[]) -{ - auto& tests = autotest::test_suite::global_test_suite().test_defs; - return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0); -} diff --git a/test_conformance/clcpp/workgroups/test_wg_all.hpp b/test_conformance/clcpp/workgroups/test_wg_all.hpp deleted file mode 100644 index 35ee521710..0000000000 --- a/test_conformance/clcpp/workgroups/test_wg_all.hpp +++ /dev/null @@ -1,220 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_WG_TEST_WG_ALL_HPP -#define TEST_CONFORMANCE_CLCPP_WG_TEST_WG_ALL_HPP - -#include -#include -#include - -// Common for all OpenCL C++ tests -#include "../common.hpp" -// Common for tests of work-group functions -#include "common.hpp" - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -std::string generate_wg_all_kernel_code() -{ - return - "__kernel void test_wg_all(global uint *input, global uint *output)\n" - "{\n" - " ulong tid = get_global_id(0);\n" - "\n" - " int result = work_group_all(input[tid] < input[tid+1]);\n" - " if(result == 0) {\n output[tid] = 0;\n return;\n }\n" - " output[tid] = 1;\n" - "}\n"; -} -#else -std::string generate_wg_all_kernel_code() -{ - return "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void test_wg_all(global_ptr input, global_ptr output)\n" - "{\n" - " ulong tid = get_global_id(0);\n" - " bool result = work_group_all(input[tid] < input[tid+1]);\n" - " if(!result) {\n output[tid] = 0;\n return;\n }\n" - " output[tid] = 1;\n" - "}\n"; -} -#endif - -int verify_wg_all(const std::vector &in, const std::vector &out, size_t count, size_t wg_size) -{ - size_t i, j; - for (i = 0; i < count; i += wg_size) - { - // Work-group all - bool all = true; - for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j++) - { - if(!(in[i+j] < in[i+j+1])) - { - all = false; - break; - } - } - - // Convert bool to uint - cl_uint all_uint = all ? 1 : 0; - // Check if all work-items in work-group stored correct value - for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j++) - { - if (all_uint != out[i + j]) - { - log_info( - "work_group_all %s: Error at %lu: expected = %lu, got = %lu\n", - type_name().c_str(), - i + j, - static_cast(all_uint), - static_cast(out[i + j])); - return -1; - } - } - } - return CL_SUCCESS; -} - -std::vector generate_input_wg_all(size_t count, size_t wg_size) -{ - std::vector input(count, cl_uint(0)); - size_t j = wg_size; - for(size_t i = 0; i < count; i++) - { - input[i] = static_cast(i); - // In one place in ~half of workgroups input[tid] < input[tid+1] will - // generate false, that means for that workgroups work_group_all() - // should return false - if((j == wg_size/2) && (i > count/2)) - { - input[i] = input[i - 1]; - } - j--; - if(j == 0) - { - j = wg_size; - } - } - return input; -} - -std::vector generate_output_wg_all(size_t count, size_t wg_size) -{ - (void) wg_size; - return std::vector(count, cl_uint(1)); -} - -int work_group_all(cl_device_id device, cl_context context, cl_command_queue queue, size_t count) -{ - cl_mem buffers[2]; - cl_program program; - cl_kernel kernel; - size_t wg_size; - size_t work_size[1]; - int err; - - std::string code_str = generate_wg_all_kernel_code(); -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_all"); - RETURN_ON_ERROR(err) - return err; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_all", "-cl-std=CL2.0", false); - RETURN_ON_ERROR(err) -#else - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_all"); - RETURN_ON_ERROR(err) -#endif - - err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL); - RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo") - - // Calculate global work size - size_t flat_work_size; - size_t wg_number = static_cast( - std::ceil(static_cast(count) / wg_size) - ); - flat_work_size = wg_number * wg_size; - work_size[0] = flat_work_size; - - std::vector input = generate_input_wg_all(flat_work_size + 1, wg_size); - std::vector output = generate_output_wg_all(flat_work_size, wg_size); - - buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_uint) * input.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - buffers[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_uint) * output.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - err = clEnqueueWriteBuffer( - queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(), - static_cast(input.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer"); - - err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]); - err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]); - RETURN_ON_CL_ERROR(err, "clSetKernelArg"); - - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel"); - - err = clEnqueueReadBuffer( - queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(), - static_cast(output.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer"); - - if (verify_wg_all(input, output, flat_work_size, wg_size) != CL_SUCCESS) - { - RETURN_ON_ERROR_MSG(-1, "work_group_all failed"); - } - log_info("work_group_all passed\n"); - - clReleaseMemObject(buffers[0]); - clReleaseMemObject(buffers[1]); - clReleaseKernel(kernel); - clReleaseProgram(program); - return err; -} - -AUTO_TEST_CASE(test_work_group_all) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int err = CL_SUCCESS; - - err = work_group_all(device, context, queue, n_elems); - CHECK_ERROR(err) - - if(err != CL_SUCCESS) - return -1; - return CL_SUCCESS; -} - -#endif // TEST_CONFORMANCE_CLCPP_WG_TEST_WG_ALL_HPP diff --git a/test_conformance/clcpp/workgroups/test_wg_any.hpp b/test_conformance/clcpp/workgroups/test_wg_any.hpp deleted file mode 100644 index 1ceb1ef685..0000000000 --- a/test_conformance/clcpp/workgroups/test_wg_any.hpp +++ /dev/null @@ -1,220 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_WG_TEST_WG_ANY_HPP -#define TEST_CONFORMANCE_CLCPP_WG_TEST_WG_ANY_HPP - -#include -#include -#include - -// Common for all OpenCL C++ tests -#include "../common.hpp" -// Common for tests of work-group functions -#include "common.hpp" - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -std::string generate_wg_any_kernel_code() -{ - return - "__kernel void test_wg_any(global uint *input, global uint *output)\n" - "{\n" - " ulong tid = get_global_id(0);\n" - "\n" - " int result = work_group_any(input[tid] == input[tid+1]);\n" - " if(result == 0) {\n output[tid] = 0;\n return;\n }\n" - " output[tid] = 1;\n" - "}\n"; -} -#else -std::string generate_wg_any_kernel_code() -{ - return "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void test_wg_any(global_ptr input, global_ptr output)\n" - "{\n" - " ulong tid = get_global_id(0);\n" - " bool result = work_group_any(input[tid] == input[tid+1]);\n" - " if(!result) {\n output[tid] = 0;\n return;\n }\n" - " output[tid] = 1;\n" - "}\n"; -} -#endif - -int verify_wg_any(const std::vector &in, const std::vector &out, size_t count, size_t wg_size) -{ - size_t i, j; - for (i = 0; i < count; i += wg_size) - { - // Work-group any - bool any = false; - for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j++) - { - if(in[i+j] == in[i+j+1]) - { - any = true; - break; - } - } - - // Convert bool to uint - cl_uint any_uint = any ? 1 : 0; - // Check if all work-items in work-group stored correct value - for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j++) - { - if (any_uint != out[i + j]) - { - log_info( - "work_group_any %s: Error at %lu: expected = %lu, got = %lu\n", - type_name().c_str(), - i + j, - static_cast(any_uint), - static_cast(out[i + j])); - return -1; - } - } - } - return CL_SUCCESS; -} - -std::vector generate_input_wg_any(size_t count, size_t wg_size) -{ - std::vector input(count, cl_uint(0)); - size_t j = wg_size; - for(size_t i = 0; i < count; i++) - { - input[i] = static_cast(i); - // In one place in ~half of workgroups input[tid] == input[tid+1] will - // generate true, that means for that workgroups work_group_any() - // should return true - if((j == wg_size/2) && (i > count/2)) - { - input[i] = input[i - 1]; - } - j--; - if(j == 0) - { - j = wg_size; - } - } - return input; -} - -std::vector generate_output_wg_any(size_t count, size_t wg_size) -{ - (void) wg_size; - return std::vector(count, cl_uint(1)); -} - -int work_group_any(cl_device_id device, cl_context context, cl_command_queue queue, size_t count) -{ - cl_mem buffers[2]; - cl_program program; - cl_kernel kernel; - size_t wg_size; - size_t work_size[1]; - int err; - - std::string code_str = generate_wg_any_kernel_code(); -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_any"); - RETURN_ON_ERROR(err) - return err; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_any", "-cl-std=CL2.0", false); - RETURN_ON_ERROR(err) -#else - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_any"); - RETURN_ON_ERROR(err) -#endif - - err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL); - RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo") - - // Calculate global work size - size_t flat_work_size; - size_t wg_number = static_cast( - std::ceil(static_cast(count) / wg_size) - ); - flat_work_size = wg_number * wg_size; - work_size[0] = flat_work_size; - - std::vector input = generate_input_wg_any(flat_work_size + 1, wg_size); - std::vector output = generate_output_wg_any(flat_work_size, wg_size); - - buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_uint) * input.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - buffers[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_uint) * output.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - err = clEnqueueWriteBuffer( - queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(), - static_cast(input.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer"); - - err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]); - err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]); - RETURN_ON_CL_ERROR(err, "clSetKernelArg"); - - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel"); - - err = clEnqueueReadBuffer( - queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(), - static_cast(output.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer"); - - if (verify_wg_any(input, output, flat_work_size, wg_size) != CL_SUCCESS) - { - RETURN_ON_ERROR_MSG(-1, "work_group_any failed"); - } - log_info("work_group_any passed\n"); - - clReleaseMemObject(buffers[0]); - clReleaseMemObject(buffers[1]); - clReleaseKernel(kernel); - clReleaseProgram(program); - return err; -} - -AUTO_TEST_CASE(test_work_group_any) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int err = CL_SUCCESS; - - err = work_group_any(device, context, queue, n_elems); - CHECK_ERROR(err) - - if(err != CL_SUCCESS) - return -1; - return CL_SUCCESS; -} - -#endif // TEST_CONFORMANCE_CLCPP_WG_TEST_WG_ANY_HPP diff --git a/test_conformance/clcpp/workgroups/test_wg_broadcast.hpp b/test_conformance/clcpp/workgroups/test_wg_broadcast.hpp deleted file mode 100644 index 999aef192b..0000000000 --- a/test_conformance/clcpp/workgroups/test_wg_broadcast.hpp +++ /dev/null @@ -1,460 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_WG_TEST_WG_BROADCAST_HPP -#define TEST_CONFORMANCE_CLCPP_WG_TEST_WG_BROADCAST_HPP - -#include -#include -#include - -// Common for all OpenCL C++ tests -#include "../common.hpp" -// Common for tests of work-group functions -#include "common.hpp" - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -std::string generate_wg_broadcast_1D_kernel_code() -{ - return - "__kernel void test_wg_broadcast(global uint *input, global uint *output)\n" - "{\n" - " ulong tid = get_global_id(0);\n" - " uint result = work_group_broadcast(input[tid], get_group_id(0) % get_local_size(0));\n" - " output[tid] = result;\n" - "}\n"; -} -std::string generate_wg_broadcast_2D_kernel_code() -{ - return - "__kernel void test_wg_broadcast(global uint *input, global uint *output)\n" - "{\n" - " ulong tid_x = get_global_id(0);\n" - " ulong tid_y = get_global_id(1);\n" - " size_t x = get_group_id(0) % get_local_size(0);\n" - " size_t y = get_group_id(1) % get_local_size(1);\n" - " size_t idx = (tid_y * get_global_size(0)) + tid_x;\n" - " uint result = work_group_broadcast(input[idx], x, y);\n" - " output[idx] = result;\n" - "}\n"; -} -std::string generate_wg_broadcast_3D_kernel_code() -{ - return - "__kernel void test_wg_broadcast(global uint *input, global uint *output)\n" - "{\n" - " ulong tid_x = get_global_id(0);\n" - " ulong tid_y = get_global_id(1);\n" - " ulong tid_z = get_global_id(2);\n" - " size_t x = get_group_id(0) % get_local_size(0);\n" - " size_t y = get_group_id(1) % get_local_size(1);\n" - " size_t z = get_group_id(2) % get_local_size(2);\n" - " ulong idx = (tid_z * get_global_size(1) * get_global_size(0)) + (tid_y * get_global_size(0)) + tid_x;\n" - " uint result = work_group_broadcast(input[idx], x, y, z);\n" - " output[idx] = result;\n" - "}\n"; -} -#else -std::string generate_wg_broadcast_1D_kernel_code() -{ - return "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void test_wg_broadcast(global_ptr input, global_ptr output)\n" - "{\n" - " ulong tid = get_global_id(0);\n" - " uint result = work_group_broadcast(input[tid], get_group_id(0) % get_local_size(0));\n" - " output[tid] = result;\n" - "}\n"; -} -std::string generate_wg_broadcast_2D_kernel_code() -{ - return "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void test_wg_broadcast(global_ptr input, global_ptr output)\n" - "{\n" - " ulong tid_x = get_global_id(0);\n" - " ulong tid_y = get_global_id(1);\n" - " size_t x = get_group_id(0) % get_local_size(0);\n" - " size_t y = get_group_id(1) % get_local_size(1);\n" - " size_t idx = (tid_y * get_global_size(0)) + tid_x;\n" - " uint result = work_group_broadcast(input[idx], x, y);\n" - " output[idx] = result;\n" - "}\n"; -} -std::string generate_wg_broadcast_3D_kernel_code() -{ - return "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void test_wg_broadcast(global_ptr input, global_ptr output)\n" - "{\n" - " ulong tid_x = get_global_id(0);\n" - " ulong tid_y = get_global_id(1);\n" - " ulong tid_z = get_global_id(2);\n" - " size_t x = get_group_id(0) % get_local_size(0);\n" - " size_t y = get_group_id(1) % get_local_size(1);\n" - " size_t z = get_group_id(2) % get_local_size(2);\n" - " ulong idx = (tid_z * get_global_size(1) * get_global_size(0)) + (tid_y * get_global_size(0)) + tid_x;\n" - " uint result = work_group_broadcast(input[idx], x, y, z);\n" - " output[idx] = result;\n" - "}\n"; -} -#endif - -int -verify_wg_broadcast_1D(const std::vector &in, const std::vector &out, size_t n, size_t wg_size) -{ - size_t i, j; - size_t group_id; - - for (i=0,group_id=0; i wg_size ? wg_size : (n-i); - cl_uint broadcast_result = in[i + (group_id % local_size)]; - for (j=0; j &in, const std::vector &out, - size_t nx, size_t ny, - size_t wg_size_x, size_t wg_size_y) -{ - size_t i, j, _i, _j; - size_t group_id_x, group_id_y; - - for (i=0,group_id_y=0; i wg_size_y ? wg_size_y : (ny-i); - for (_i=0; _i < local_size_y; _i++) - { - for (j=0,group_id_x=0; j wg_size_x ? wg_size_x : (nx-j); - cl_uint broadcast_result = in[(i + y) * nx + (j + x)]; - for (_j=0; _j < local_size_x; _j++) - { - size_t indx = (i + _i) * nx + (j + _j); - if ( broadcast_result != out[indx] ) - { - log_info("%lu\n", indx); - log_info("%lu\n", ((i + y) * nx + (j + x))); - log_info("%lu\n", out.size()); - log_info("work_group_broadcast: Error at (%lu, %lu): expected = %u, got = %u\n", j+_j, i+_i, broadcast_result, out[indx]); - return -1; - } - } - } - } - } - - return CL_SUCCESS; -} - -int -verify_wg_broadcast_3D(const std::vector &in, const std::vector &out, - size_t nx, size_t ny, size_t nz, - size_t wg_size_x, size_t wg_size_y, size_t wg_size_z) -{ - size_t i, j, k, _i, _j, _k; - size_t group_id_x, group_id_y, group_id_z; - - for (i=0,group_id_z=0; i wg_size_z ? wg_size_z : (nz-i); - for (_i=0; _i < local_size_z; _i++) - { - for (j=0,group_id_y=0; j wg_size_y ? wg_size_y : (ny-j); - for (_j=0; _j < local_size_y; _j++) - { - for (k=0,group_id_x=0; k wg_size_x ? wg_size_x : (nx-k); - cl_uint broadcast_result = in[(i + z) * ny * nz + (j + y) * nx + (k + x)]; - for (_k=0; _k < local_size_x; _k++) - { - size_t indx = (i + _i) * ny * nx + (j + _j) * nx + (k + _k); - if ( broadcast_result != out[indx] ) - { - log_info( - "work_group_broadcast: Error at (%lu, %lu, %lu): expected = %u, got = %u\n", - k+_k, j+_j, i+_i, - broadcast_result, out[indx]); - return -1; - } - } - } - } - } - } - } - return CL_SUCCESS; -} - -std::vector generate_input_wg_broadcast(size_t count, size_t wg_size) -{ - std::vector input(count, cl_uint(0)); - size_t j = wg_size; - for(size_t i = 0; i < count; i++) - { - input[i] = static_cast(j); - j--; - if(j == 0) - { - j = wg_size; - } - } - return input; -} - -std::vector generate_output_wg_broadcast(size_t count, size_t wg_size) -{ - (void) wg_size; - return std::vector(count, cl_uint(1)); -} - -int work_group_broadcast(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, size_t dim) -{ - cl_mem buffers[2]; - cl_program program; - cl_kernel kernel; - size_t flat_wg_size; - size_t wg_size[] = { 1, 1, 1}; - size_t work_size[] = { 1, 1, 1}; - int err; - - // Get kernel source code - std::string code_str; - if(dim > 2) code_str = generate_wg_broadcast_3D_kernel_code(); - else if(dim > 1) code_str = generate_wg_broadcast_2D_kernel_code(); - else code_str = generate_wg_broadcast_1D_kernel_code(); - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_broadcast"); - RETURN_ON_ERROR(err) - return err; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_broadcast", "-cl-std=CL2.0", false); - RETURN_ON_ERROR(err) -#else - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_broadcast"); - RETURN_ON_ERROR(err) -#endif - - // Get max flat workgroup size - err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &flat_wg_size, NULL); - RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo") - - // Set local work size - wg_size[0] = flat_wg_size; - if(dim > 2) - { - if (flat_wg_size >=512) - { - wg_size[0] = wg_size[1] = wg_size[2] = 8; - } - else if (flat_wg_size >= 64) - { - wg_size[0] = wg_size[1] = wg_size[2] = 4; - } - else if (flat_wg_size >= 8) - { - wg_size[0] = wg_size[1] = wg_size[2] = 2; - } - else - { - wg_size[0] = wg_size[1] = wg_size[2] = 1; - } - } - else if(dim > 1) - { - if (flat_wg_size >= 256) - { - wg_size[0] = wg_size[1] = 16; - } - else if (flat_wg_size >=64) - { - wg_size[0] = wg_size[1] = 8; - } - else if (flat_wg_size >= 16) - { - wg_size[0] = wg_size[1] = 4; - } - else - { - wg_size[0] = wg_size[1] = 1; - } - } - - // Calculate flat local work size - flat_wg_size = wg_size[0]; - if(dim > 1) flat_wg_size *= wg_size[1]; - if(dim > 2) flat_wg_size *= wg_size[2]; - - // Calculate global work size - size_t flat_work_size = count; - // 3D - if(dim > 2) - { - size_t wg_number = static_cast( - std::ceil(static_cast(count / 3) / (wg_size[0] * wg_size[1] * wg_size[2])) - ); - work_size[0] = wg_number * wg_size[0]; - work_size[1] = wg_number * wg_size[1]; - work_size[2] = wg_number * wg_size[2]; - flat_work_size = work_size[0] * work_size[1] * work_size[2]; - } - // 2D - else if(dim > 1) - { - size_t wg_number = static_cast( - std::ceil(static_cast(count / 2) / (wg_size[0] * wg_size[1])) - ); - work_size[0] = wg_number * wg_size[0]; - work_size[1] = wg_number * wg_size[1]; - flat_work_size = work_size[0] * work_size[1]; - } - // 1D - else - { - size_t wg_number = static_cast( - std::ceil(static_cast(count) / wg_size[0]) - ); - flat_work_size = wg_number * wg_size[0]; - work_size[0] = flat_work_size; - } - - std::vector input = generate_input_wg_broadcast(flat_work_size, flat_wg_size); - std::vector output = generate_output_wg_broadcast(flat_work_size, flat_wg_size); - - buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_uint) * input.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - buffers[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_uint) * output.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - err = clEnqueueWriteBuffer( - queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(), - static_cast(input.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer"); - - err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]); - err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]); - RETURN_ON_CL_ERROR(err, "clSetKernelArg"); - - err = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, work_size, wg_size, 0, NULL, NULL); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel"); - - err = clEnqueueReadBuffer( - queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(), - static_cast(output.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer"); - - int result = CL_SUCCESS; - // 3D - if(dim > 2) - { - result = verify_wg_broadcast_3D( - input, output, - work_size[0], work_size[1], work_size[2], - wg_size[0], wg_size[1], wg_size[2] - ); - } - // 2D - else if(dim > 1) - { - result = verify_wg_broadcast_2D( - input, output, - work_size[0], work_size[1], - wg_size[0], wg_size[1] - ); - } - // 1D - else - { - result = verify_wg_broadcast_1D( - input, output, - work_size[0], - wg_size[0] - ); - } - - RETURN_ON_ERROR_MSG(result, "work_group_broadcast_%luD failed", dim); - log_info("work_group_broadcast_%luD passed\n", dim); - - clReleaseMemObject(buffers[0]); - clReleaseMemObject(buffers[1]); - clReleaseKernel(kernel); - clReleaseProgram(program); - return err; -} - -AUTO_TEST_CASE(test_work_group_broadcast) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int local_error = CL_SUCCESS; - - local_error = work_group_broadcast(device, context, queue, n_elems, 1); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = work_group_broadcast(device, context, queue, n_elems, 2); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = work_group_broadcast(device, context, queue, n_elems, 3); - CHECK_ERROR(local_error) - error |= local_error; - - if(error != CL_SUCCESS) - return -1; - return CL_SUCCESS; -} - -#endif // TEST_CONFORMANCE_CLCPP_WG_TEST_WG_BROADCAST_HPP diff --git a/test_conformance/clcpp/workgroups/test_wg_reduce.hpp b/test_conformance/clcpp/workgroups/test_wg_reduce.hpp deleted file mode 100644 index 160b2e8655..0000000000 --- a/test_conformance/clcpp/workgroups/test_wg_reduce.hpp +++ /dev/null @@ -1,334 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_WG_TEST_WG_REDUCE_HPP -#define TEST_CONFORMANCE_CLCPP_WG_TEST_WG_REDUCE_HPP - -#include -#include -#include - -// Common for all OpenCL C++ tests -#include "../common.hpp" -// Common for tests of work-group functions -#include "common.hpp" - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -template -std::string generate_wg_reduce_kernel_code() -{ - return - "__kernel void test_wg_reduce(global " + type_name() + " *input, global " + type_name() + " *output)\n" - "{\n" - " ulong tid = get_global_id(0);\n" - "\n" - " " + type_name() + " result = work_group_reduce_" + to_string(op) + "(input[tid]);\n" - " output[tid] = result;\n" - "}\n"; -} -#else -template -std::string generate_wg_reduce_kernel_code() -{ - return "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void test_wg_reduce(global_ptr<" + type_name() + "[]> input, " - "global_ptr<" + type_name() + "[]> output)\n" - "{\n" - " ulong tid = get_global_id(0);\n" - " " + type_name() + " result = work_group_reduce(input[tid]);\n" - " output[tid] = result;\n" - "}\n"; -} -#endif - -template -int verify_wg_reduce_add(const std::vector &in, const std::vector &out, size_t wg_size) -{ - size_t i, j; - for (i = 0; i < in.size(); i += wg_size) - { - CL_INT_TYPE sum = 0; - // Work-group sum - for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++) - sum += in[i + j]; - - // Check if all work-items in work-group stored correct value - for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++) - { - if (sum != out[i + j]) - { - log_info( - "work_group_reduce_add %s: Error at %lu: expected = %lu, got = %lu\n", - type_name().c_str(), - i + j, - static_cast(sum), - static_cast(out[i + j])); - return -1; - } - } - } - return 0; -} - -template -int verify_wg_reduce_min(const std::vector &in, const std::vector &out, size_t wg_size) -{ - size_t i, j; - for (i = 0; i < in.size(); i += wg_size) - { - CL_INT_TYPE min = (std::numeric_limits::max)(); - // Work-group min - for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++) - min = std::min(min, in[i + j]); - - // Check if all work-items in work-group stored correct value - for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++) - { - if (min != out[i + j]) - { - log_info( - "work_group_reduce_min %s: Error at %lu: expected = %lu, got = %lu\n", - type_name().c_str(), - i + j, - static_cast(min), - static_cast(out[i + j])); - return -1; - } - } - } - return 0; -} - -template -int verify_wg_reduce_max(const std::vector &in, const std::vector &out, size_t wg_size) -{ - size_t i, j; - for (i = 0; i < in.size(); i += wg_size) - { - CL_INT_TYPE max = (std::numeric_limits::min)(); - // Work-group max - for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++) - max = std::max(max, in[i + j]); - - // Check if all work-items in work-group stored correct value - for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++) - { - if (max != out[i + j]) - { - log_info( - "work_group_reduce_max %s: Error at %lu: expected = %lu, got = %lu\n", - type_name().c_str(), - i + j, - static_cast(max), - static_cast(out[i + j])); - return -1; - } - } - } - return 0; -} - -template -int verify_wg_reduce(const std::vector &in, const std::vector &out, size_t wg_size) -{ - switch (op) - { - case work_group_op::add: - return verify_wg_reduce_add(in, out, wg_size); - case work_group_op::min: - return verify_wg_reduce_min(in, out, wg_size); - case work_group_op::max: - return verify_wg_reduce_max(in, out, wg_size); - } - return -1; -} - -template -int work_group_reduce(cl_device_id device, cl_context context, cl_command_queue queue, size_t count) -{ - // don't run test for unsupported types - if(!type_supported(device)) - { - return CL_SUCCESS; - } - - cl_mem buffers[2]; - cl_program program; - cl_kernel kernel; - size_t wg_size; - size_t work_size[1]; - int err; - - std::string code_str = generate_wg_reduce_kernel_code(); -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_reduce"); - RETURN_ON_ERROR(err) - return err; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_reduce", "-cl-std=CL2.0", false); - RETURN_ON_ERROR(err) -#else - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_reduce"); - RETURN_ON_ERROR(err) -#endif - - err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL); - RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo") - - // Calculate global work size - size_t flat_work_size; - size_t wg_number = static_cast( - std::ceil(static_cast(count) / wg_size) - ); - flat_work_size = wg_number * wg_size; - work_size[0] = flat_work_size; - - std::vector input = generate_input(flat_work_size, wg_size); - std::vector output = generate_output(flat_work_size, wg_size); - - buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(CL_INT_TYPE) * input.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - buffers[1] = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(CL_INT_TYPE) * output.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - err = clEnqueueWriteBuffer( - queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(), - static_cast(input.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer"); - - err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]); - err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]); - RETURN_ON_CL_ERROR(err, "clSetKernelArg"); - - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel"); - - err = clEnqueueReadBuffer( - queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(), - static_cast(output.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer"); - - if (verify_wg_reduce(input, output, wg_size) != CL_SUCCESS) - { - RETURN_ON_ERROR_MSG(-1, "work_group_reduce_%s %s failed", to_string(op).c_str(), type_name().c_str()); - } - log_info("work_group_reduce_%s %s passed\n", to_string(op).c_str(), type_name().c_str()); - - clReleaseMemObject(buffers[0]); - clReleaseMemObject(buffers[1]); - clReleaseKernel(kernel); - clReleaseProgram(program); - return err; -} - -AUTO_TEST_CASE(test_work_group_reduce_add) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int local_error = CL_SUCCESS; - - local_error = work_group_reduce(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = work_group_reduce(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = work_group_reduce(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = work_group_reduce(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - if(error != CL_SUCCESS) - return -1; - return CL_SUCCESS; -} - -AUTO_TEST_CASE(test_work_group_reduce_min) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int local_error = CL_SUCCESS; - - local_error = work_group_reduce(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = work_group_reduce(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = work_group_reduce(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = work_group_reduce(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - if(error != CL_SUCCESS) - return -1; - return CL_SUCCESS; -} - -AUTO_TEST_CASE(test_work_group_reduce_max) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int local_error = CL_SUCCESS; - - local_error = work_group_reduce(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = work_group_reduce(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = work_group_reduce(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = work_group_reduce(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - if(error != CL_SUCCESS) - return -1; - return CL_SUCCESS; -} - -#endif // TEST_CONFORMANCE_CLCPP_WG_TEST_WG_REDUCE_HPP diff --git a/test_conformance/clcpp/workgroups/test_wg_scan_exclusive.hpp b/test_conformance/clcpp/workgroups/test_wg_scan_exclusive.hpp deleted file mode 100644 index ef0e8ffc30..0000000000 --- a/test_conformance/clcpp/workgroups/test_wg_scan_exclusive.hpp +++ /dev/null @@ -1,327 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_WG_TEST_WG_SCAN_EXCLUSIVE_HPP -#define TEST_CONFORMANCE_CLCPP_WG_TEST_WG_SCAN_EXCLUSIVE_HPP - -#include -#include - -// Common for all OpenCL C++ tests -#include "../common.hpp" -// Common for tests of work-group functions -#include "common.hpp" - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -template -std::string generate_wg_scan_exclusive_kernel_code() -{ - return - "__kernel void test_wg_scan_exclusive(global " + type_name() + " *input, global " + type_name() + " *output)\n" - "{\n" - " ulong tid = get_global_id(0);\n" - "\n" - " " + type_name() + " result = work_group_scan_exclusive_" + to_string(op) + "(input[tid]);\n" - " output[tid] = result;\n" - "}\n"; -} -#else -template -std::string generate_wg_scan_exclusive_kernel_code() -{ - return "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void test_wg_scan_exclusive(global_ptr<" + type_name() + "[]> input, " - "global_ptr<" + type_name() + "[]> output)\n" - "{\n" - " ulong tid = get_global_id(0);\n" - " " + type_name() + " result = work_group_scan_exclusive(input[tid]);\n" - " output[tid] = result;\n" - "}\n"; -} -#endif - -template -int verify_wg_scan_exclusive_add(const std::vector &in, const std::vector &out, size_t wg_size) -{ - size_t i, j; - for (i = 0; i < in.size(); i += wg_size) - { - CL_INT_TYPE sum = 0; - - // Check if all work-items in work-group wrote correct value - for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++) - { - if (sum != out[i + j]) - { - log_info( - "work_group_scan_exclusive_add %s: Error at %lu: expected = %lu, got = %lu\n", - type_name().c_str(), - i + j, - static_cast(sum), - static_cast(out[i + j])); - return -1; - } - sum += in[i + j]; - } - } - return CL_SUCCESS; -} - -template -int verify_wg_scan_exclusive_min(const std::vector &in, const std::vector &out, size_t wg_size) -{ - size_t i, j; - for (i = 0; i < in.size(); i += wg_size) - { - CL_INT_TYPE min = (std::numeric_limits::max)(); - - // Check if all work-items in work-group wrote correct value - for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++) - { - if (min != out[i + j]) - { - log_info( - "work_group_scan_exclusive_min %s: Error at %lu: expected = %lu, got = %lu\n", - type_name().c_str(), - i + j, - static_cast(min), - static_cast(out[i + j])); - return -1; - } - min = (std::min)(min, in[i + j]); - } - } - return CL_SUCCESS; -} - -template -int verify_wg_scan_exclusive_max(const std::vector &in, const std::vector &out, size_t wg_size) -{ - size_t i, j; - for (i = 0; i < in.size(); i += wg_size) - { - CL_INT_TYPE max = (std::numeric_limits::min)(); - - // Check if all work-items in work-group wrote correct value - for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++) - { - if (max != out[i + j]) - { - log_info( - "work_group_scan_exclusive_max %s: Error at %lu: expected = %lu, got = %lu\n", - type_name().c_str(), - i + j, - static_cast(max), - static_cast(out[i + j])); - return -1; - } - max = (std::max)(max, in[i + j]); - } - } - return CL_SUCCESS; -} - -template -int verify_wg_scan_exclusive(const std::vector &in, const std::vector &out, size_t wg_size) -{ - switch (op) - { - case work_group_op::add: - return verify_wg_scan_exclusive_add(in, out, wg_size); - case work_group_op::min: - return verify_wg_scan_exclusive_min(in, out, wg_size); - case work_group_op::max: - return verify_wg_scan_exclusive_max(in, out, wg_size); - } - return -1; -} - -template -int work_group_scan_exclusive(cl_device_id device, cl_context context, cl_command_queue queue, size_t count) -{ - // don't run test for unsupported types - if(!type_supported(device)) - { - return CL_SUCCESS; - } - - cl_mem buffers[2]; - cl_program program; - cl_kernel kernel; - size_t wg_size; - size_t work_size[1]; - int err; - - std::string code_str = generate_wg_scan_exclusive_kernel_code(); -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_exclusive"); - RETURN_ON_ERROR(err) - return err; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_exclusive", "-cl-std=CL2.0", false); - RETURN_ON_ERROR(err) -#else - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_exclusive"); - RETURN_ON_ERROR(err) -#endif - - err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL); - RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo") - - // Calculate global work size - size_t flat_work_size; - size_t wg_number = static_cast( - std::ceil(static_cast(count) / wg_size) - ); - flat_work_size = wg_number * wg_size; - work_size[0] = flat_work_size; - - std::vector input = generate_input(flat_work_size, wg_size); - std::vector output = generate_output(flat_work_size, wg_size); - - buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(CL_INT_TYPE) * input.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - buffers[1] = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(CL_INT_TYPE) * output.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - err = clEnqueueWriteBuffer( - queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(), - static_cast(input.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer"); - - err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]); - err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]); - RETURN_ON_CL_ERROR(err, "clSetKernelArg"); - - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel"); - - err = clEnqueueReadBuffer( - queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(), - static_cast(output.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer"); - - if (verify_wg_scan_exclusive(input, output, wg_size) != CL_SUCCESS) - { - RETURN_ON_ERROR_MSG(-1, "work_group_scan_exclusive_%s %s failed", to_string(op).c_str(), type_name().c_str()); - } - log_info("work_group_scan_exclusive_%s %s passed\n", to_string(op).c_str(), type_name().c_str()); - - clReleaseMemObject(buffers[0]); - clReleaseMemObject(buffers[1]); - clReleaseKernel(kernel); - clReleaseProgram(program); - return err; -} - -AUTO_TEST_CASE(test_work_group_scan_exclusive_add) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int local_error = CL_SUCCESS; - - local_error = work_group_scan_exclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = work_group_scan_exclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = work_group_scan_exclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = work_group_scan_exclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - if(error != CL_SUCCESS) - return -1; - return CL_SUCCESS; -} - -AUTO_TEST_CASE(test_work_group_scan_exclusive_min) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int local_error = CL_SUCCESS; - - local_error = work_group_scan_exclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = work_group_scan_exclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = work_group_scan_exclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = work_group_scan_exclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - if(error != CL_SUCCESS) - return -1; - return CL_SUCCESS; -} - -AUTO_TEST_CASE(test_work_group_scan_exclusive_max) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int local_error = CL_SUCCESS; - - local_error = work_group_scan_exclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = work_group_scan_exclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = work_group_scan_exclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = work_group_scan_exclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - if(error != CL_SUCCESS) - return -1; - return CL_SUCCESS; -} - -#endif // TEST_CONFORMANCE_CLCPP_WG_TEST_WG_SCAN_EXCLUSIVE_HPP diff --git a/test_conformance/clcpp/workgroups/test_wg_scan_inclusive.hpp b/test_conformance/clcpp/workgroups/test_wg_scan_inclusive.hpp deleted file mode 100644 index 5623aed7f8..0000000000 --- a/test_conformance/clcpp/workgroups/test_wg_scan_inclusive.hpp +++ /dev/null @@ -1,327 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_WG_TEST_WG_SCAN_INCLUSIVE_HPP -#define TEST_CONFORMANCE_CLCPP_WG_TEST_WG_SCAN_INCLUSIVE_HPP - -#include -#include - -// Common for all OpenCL C++ tests -#include "../common.hpp" -// Common for tests of work-group functions -#include "common.hpp" - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -template -std::string generate_wg_scan_inclusive_kernel_code() -{ - return - "__kernel void test_wg_scan_inclusive(global " + type_name() + " *input, global " + type_name() + " *output)\n" - "{\n" - " ulong tid = get_global_id(0);\n" - "\n" - " " + type_name() + " result = work_group_scan_inclusive_" + to_string(op) + "(input[tid]);\n" - " output[tid] = result;\n" - "}\n"; -} -#else -template -std::string generate_wg_scan_inclusive_kernel_code() -{ - return "#include \n" - "#include \n" - "#include \n" - "using namespace cl;\n" - "__kernel void test_wg_scan_inclusive(global_ptr<" + type_name() + "[]> input, " - "global_ptr<" + type_name() + "[]> output)\n" - "{\n" - " ulong tid = get_global_id(0);\n" - " " + type_name() + " result = work_group_scan_inclusive(input[tid]);\n" - " output[tid] = result;\n" - "}\n"; -} -#endif - -template -int verify_wg_scan_inclusive_add(const std::vector &in, const std::vector &out, size_t wg_size) -{ - size_t i, j; - for (i = 0; i < in.size(); i += wg_size) - { - CL_INT_TYPE sum = 0; - - // Check if all work-items in work-group wrote correct value - for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++) - { - sum += in[i + j]; - if (sum != out[i + j]) - { - log_info( - "work_group_scan_inclusive_add %s: Error at %lu: expected = %lu, got = %lu\n", - type_name().c_str(), - i + j, - static_cast(sum), - static_cast(out[i + j])); - return -1; - } - } - } - return CL_SUCCESS; -} - -template -int verify_wg_scan_inclusive_min(const std::vector &in, const std::vector &out, size_t wg_size) -{ - size_t i, j; - for (i = 0; i < in.size(); i += wg_size) - { - CL_INT_TYPE min = (std::numeric_limits::max)(); - - // Check if all work-items in work-group wrote correct value - for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++) - { - min = (std::min)(min, in[i + j]); - if (min != out[i + j]) - { - log_info( - "work_group_scan_inclusive_min %s: Error at %lu: expected = %lu, got = %lu\n", - type_name().c_str(), - i + j, - static_cast(min), - static_cast(out[i + j])); - return -1; - } - } - } - return CL_SUCCESS; -} - -template -int verify_wg_scan_inclusive_max(const std::vector &in, const std::vector &out, size_t wg_size) -{ - size_t i, j; - for (i = 0; i < in.size(); i += wg_size) - { - CL_INT_TYPE max = (std::numeric_limits::min)(); - - // Check if all work-items in work-group wrote correct value - for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++) - { - max = (std::max)(max, in[i + j]); - if (max != out[i + j]) - { - log_info( - "work_group_scan_inclusive_max %s: Error at %lu: expected = %lu, got = %lu\n", - type_name().c_str(), - i + j, - static_cast(max), - static_cast(out[i + j])); - return -1; - } - } - } - return CL_SUCCESS; -} - -template -int verify_wg_scan_inclusive(const std::vector &in, const std::vector &out, size_t wg_size) -{ - switch (op) - { - case work_group_op::add: - return verify_wg_scan_inclusive_add(in, out, wg_size); - case work_group_op::min: - return verify_wg_scan_inclusive_min(in, out, wg_size); - case work_group_op::max: - return verify_wg_scan_inclusive_max(in, out, wg_size); - } - return -1; -} - -template -int work_group_scan_inclusive(cl_device_id device, cl_context context, cl_command_queue queue, size_t count) -{ - // don't run test for unsupported types - if(!type_supported(device)) - { - return CL_SUCCESS; - } - - cl_mem buffers[2]; - cl_program program; - cl_kernel kernel; - size_t wg_size; - size_t work_size[1]; - int err; - - std::string code_str = generate_wg_scan_inclusive_kernel_code(); -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_inclusive"); - RETURN_ON_ERROR(err) - return err; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_inclusive", "-cl-std=CL2.0", false); - RETURN_ON_ERROR(err) -#else - err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_inclusive"); - RETURN_ON_ERROR(err) -#endif - - err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL); - RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo") - - // Calculate global work size - size_t flat_work_size; - size_t wg_number = static_cast( - std::ceil(static_cast(count) / wg_size) - ); - flat_work_size = wg_number * wg_size; - work_size[0] = flat_work_size; - - std::vector input = generate_input(flat_work_size, wg_size); - std::vector output = generate_output(flat_work_size, wg_size); - - buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(CL_INT_TYPE) * input.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - buffers[1] = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(CL_INT_TYPE) * output.size(), NULL, &err); - RETURN_ON_CL_ERROR(err, "clCreateBuffer"); - - err = clEnqueueWriteBuffer( - queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(), - static_cast(input.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer"); - - err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]); - err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]); - RETURN_ON_CL_ERROR(err, "clSetKernelArg"); - - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL); - RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel"); - - err = clEnqueueReadBuffer( - queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(), - static_cast(output.data()), 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer"); - - if (verify_wg_scan_inclusive(input, output, wg_size) != CL_SUCCESS) - { - RETURN_ON_ERROR_MSG(-1, "work_group_scan_inclusive_%s %s failed", to_string(op).c_str(), type_name().c_str()); - } - log_info("work_group_scan_inclusive_%s %s passed\n", to_string(op).c_str(), type_name().c_str()); - - clReleaseMemObject(buffers[0]); - clReleaseMemObject(buffers[1]); - clReleaseKernel(kernel); - clReleaseProgram(program); - return err; -} - -AUTO_TEST_CASE(test_work_group_scan_inclusive_add) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int local_error = CL_SUCCESS; - - local_error = work_group_scan_inclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = work_group_scan_inclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = work_group_scan_inclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = work_group_scan_inclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - if(error != CL_SUCCESS) - return -1; - return CL_SUCCESS; -} - -AUTO_TEST_CASE(test_work_group_scan_inclusive_min) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int local_error = CL_SUCCESS; - - local_error = work_group_scan_inclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = work_group_scan_inclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = work_group_scan_inclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = work_group_scan_inclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - if(error != CL_SUCCESS) - return -1; - return CL_SUCCESS; -} - -AUTO_TEST_CASE(test_work_group_scan_inclusive_max) -(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - int error = CL_SUCCESS; - int local_error = CL_SUCCESS; - - local_error = work_group_scan_inclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = work_group_scan_inclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = work_group_scan_inclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - local_error = work_group_scan_inclusive(device, context, queue, n_elems); - CHECK_ERROR(local_error) - error |= local_error; - - if(error != CL_SUCCESS) - return -1; - return CL_SUCCESS; -} - -#endif // TEST_CONFORMANCE_CLCPP_WG_TEST_WG_SCAN_INCLUSIVE_HPP diff --git a/test_conformance/clcpp/workitems/CMakeLists.txt b/test_conformance/clcpp/workitems/CMakeLists.txt deleted file mode 100644 index 00359334fb..0000000000 --- a/test_conformance/clcpp/workitems/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -set(MODULE_NAME CPP_WORKITEMS) - -set(${MODULE_NAME}_SOURCES - main.cpp -) - -include(../../CMakeCommon.txt) diff --git a/test_conformance/clcpp/workitems/main.cpp b/test_conformance/clcpp/workitems/main.cpp deleted file mode 100644 index aacbdd4973..0000000000 --- a/test_conformance/clcpp/workitems/main.cpp +++ /dev/null @@ -1,25 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "../common.hpp" - -#include "test_workitems.hpp" - - -int main(int argc, const char *argv[]) -{ - auto& tests = autotest::test_suite::global_test_suite().test_defs; - return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0); -} diff --git a/test_conformance/clcpp/workitems/test_workitems.hpp b/test_conformance/clcpp/workitems/test_workitems.hpp deleted file mode 100644 index 099ef34485..0000000000 --- a/test_conformance/clcpp/workitems/test_workitems.hpp +++ /dev/null @@ -1,417 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef TEST_CONFORMANCE_CLCPP_WI_TEST_WORKITEMS_HPP -#define TEST_CONFORMANCE_CLCPP_WI_TEST_WORKITEMS_HPP - -#include -#include -#include - -// Common for all OpenCL C++ tests -#include "../common.hpp" - - -namespace test_workitems { - -struct test_options -{ - bool uniform_work_group_size; - size_t max_count; - size_t num_tests; -}; - -struct output_type -{ - cl_uint work_dim; - cl_ulong global_size[3]; - cl_ulong global_id[3]; - cl_ulong local_size[3]; - cl_ulong enqueued_local_size[3]; - cl_ulong local_id[3]; - cl_ulong num_groups[3]; - cl_ulong group_id[3]; - cl_ulong global_offset[3]; - cl_ulong global_linear_id; - cl_ulong local_linear_id; - cl_ulong sub_group_size; - cl_ulong max_sub_group_size; - cl_ulong num_sub_groups; - cl_ulong enqueued_num_sub_groups; - cl_ulong sub_group_id; - cl_ulong sub_group_local_id; -}; - -const std::string source_common = R"( -struct output_type -{ - uint work_dim; - ulong global_size[3]; - ulong global_id[3]; - ulong local_size[3]; - ulong enqueued_local_size[3]; - ulong local_id[3]; - ulong num_groups[3]; - ulong group_id[3]; - ulong global_offset[3]; - ulong global_linear_id; - ulong local_linear_id; - ulong sub_group_size; - ulong max_sub_group_size; - ulong num_sub_groups; - ulong enqueued_num_sub_groups; - ulong sub_group_id; - ulong sub_group_local_id; -}; -)"; - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) -const std::string source = - source_common + - R"( - #ifdef cl_khr_subgroups - #pragma OPENCL EXTENSION cl_khr_subgroups : enable - #endif - - kernel void test(global struct output_type *output) - { - const ulong gid = get_global_linear_id(); - output[gid].work_dim = get_work_dim(); - for (uint dimindx = 0; dimindx < 3; dimindx++) - { - output[gid].global_size[dimindx] = get_global_size(dimindx); - output[gid].global_id[dimindx] = get_global_id(dimindx); - output[gid].local_size[dimindx] = get_local_size(dimindx); - output[gid].enqueued_local_size[dimindx] = get_enqueued_local_size(dimindx); - output[gid].local_id[dimindx] = get_local_id(dimindx); - output[gid].num_groups[dimindx] = get_num_groups(dimindx); - output[gid].group_id[dimindx] = get_group_id(dimindx); - output[gid].global_offset[dimindx] = get_global_offset(dimindx); - } - output[gid].global_linear_id = get_global_linear_id(); - output[gid].local_linear_id = get_local_linear_id(); - #ifdef cl_khr_subgroups - output[gid].sub_group_size = get_sub_group_size(); - output[gid].max_sub_group_size = get_max_sub_group_size(); - output[gid].num_sub_groups = get_num_sub_groups(); - output[gid].enqueued_num_sub_groups = get_enqueued_num_sub_groups(); - output[gid].sub_group_id = get_sub_group_id(); - output[gid].sub_group_local_id = get_sub_group_local_id(); - #endif - } - )"; -#else -const std::string source = - R"( - #include - #include - using namespace cl; - )" + - source_common + - R"( - - kernel void test(global_ptr output) - { - const size_t gid = get_global_linear_id(); - output[gid].work_dim = get_work_dim(); - for (uint dimindx = 0; dimindx < 3; dimindx++) - { - output[gid].global_size[dimindx] = get_global_size(dimindx); - output[gid].global_id[dimindx] = get_global_id(dimindx); - output[gid].local_size[dimindx] = get_local_size(dimindx); - output[gid].enqueued_local_size[dimindx] = get_enqueued_local_size(dimindx); - output[gid].local_id[dimindx] = get_local_id(dimindx); - output[gid].num_groups[dimindx] = get_num_groups(dimindx); - output[gid].group_id[dimindx] = get_group_id(dimindx); - output[gid].global_offset[dimindx] = get_global_offset(dimindx); - } - output[gid].global_linear_id = get_global_linear_id(); - output[gid].local_linear_id = get_local_linear_id(); - output[gid].sub_group_size = get_sub_group_size(); - output[gid].max_sub_group_size = get_max_sub_group_size(); - output[gid].num_sub_groups = get_num_sub_groups(); - output[gid].enqueued_num_sub_groups = get_enqueued_num_sub_groups(); - output[gid].sub_group_id = get_sub_group_id(); - output[gid].sub_group_local_id = get_sub_group_local_id(); - } - - )"; -#endif - -#define CHECK_EQUAL(result, expected, func_name) \ - if (result != expected) \ - { \ - RETURN_ON_ERROR_MSG(-1, \ - "Function %s failed. Expected: %s, got: %s", func_name, \ - format_value(expected).c_str(), format_value(result).c_str() \ - ); \ - } - -#define CHECK(expression, func_name) \ - if (expression) \ - { \ - RETURN_ON_ERROR_MSG(-1, \ - "Function %s returned incorrect result", func_name \ - ); \ - } - -int test_workitems(cl_device_id device, cl_context context, cl_command_queue queue, test_options options) -{ - int error = CL_SUCCESS; - - cl_program program; - cl_kernel kernel; - - std::string kernel_name = "test"; - -// ----------------------------------------------------------------------------------- -// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ -// ----------------------------------------------------------------------------------- -// Only OpenCL C++ to SPIR-V compilation -#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) - error = create_opencl_kernel( - context, &program, &kernel, - source, kernel_name - ); - RETURN_ON_ERROR(error) - return error; -// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) -#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - error = create_opencl_kernel( - context, &program, &kernel, - source, kernel_name, "-cl-std=CL2.0", false - ); - RETURN_ON_ERROR(error) -// Normal run -#else - error = create_opencl_kernel( - context, &program, &kernel, - source, kernel_name - ); - RETURN_ON_ERROR(error) -#endif - - size_t max_work_group_size; - size_t max_local_sizes[3]; - error = get_max_allowed_work_group_size(context, kernel, &max_work_group_size, max_local_sizes); - RETURN_ON_ERROR(error) - - bool check_sub_groups = true; - bool check_sub_groups_limits = true; -#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) - check_sub_groups = false; - check_sub_groups_limits = false; - if (is_extension_available(device, "cl_khr_subgroups")) - { - Version version = get_device_cl_version(device); - RETURN_ON_ERROR(error) - check_sub_groups_limits = (version >= Version(2,1)); // clGetKernelSubGroupInfo is from 2.1 - check_sub_groups = true; - } -#endif - - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution count_dis(1, options.max_count); - - for (int test = 0; test < options.num_tests; test++) - { - for (size_t dim = 1; dim <= 3; dim++) - { - size_t global_size[3] = { 1, 1, 1 }; - size_t global_offset[3] = { 0, 0, 0 }; - size_t enqueued_local_size[3] = { 1, 1, 1 }; - size_t count = count_dis(gen); - std::uniform_int_distribution global_size_dis(1, static_cast(pow(count, 1.0 / dim))); - for (int d = 0; d < dim; d++) - { - std::uniform_int_distribution enqueued_local_size_dis(1, max_local_sizes[d]); - global_size[d] = global_size_dis(gen); - global_offset[d] = global_size_dis(gen); - enqueued_local_size[d] = enqueued_local_size_dis(gen); - } - // Local work size must not exceed CL_KERNEL_WORK_GROUP_SIZE for this kernel - while (enqueued_local_size[0] * enqueued_local_size[1] * enqueued_local_size[2] > max_work_group_size) - { - // otherwise decrease it until it fits - for (int d = 0; d < dim; d++) - { - enqueued_local_size[d] = (std::max)((size_t)1, enqueued_local_size[d] / 2); - } - } - if (options.uniform_work_group_size) - { - for (int d = 0; d < dim; d++) - { - global_size[d] = get_uniform_global_size(global_size[d], enqueued_local_size[d]); - } - } - count = global_size[0] * global_size[1] * global_size[2]; - - cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(output_type) * count, NULL, &error); - RETURN_ON_CL_ERROR(error, "clCreateBuffer") - - const char pattern = 0; - error = clEnqueueFillBuffer(queue, output_buffer, &pattern, sizeof(pattern), 0, sizeof(output_type) * count, 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueFillBuffer") - - error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer); - RETURN_ON_CL_ERROR(error, "clSetKernelArg") - - error = clEnqueueNDRangeKernel(queue, kernel, dim, global_offset, global_size, enqueued_local_size, 0, NULL, NULL); - RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel") - - std::vector output(count); - error = clEnqueueReadBuffer( - queue, output_buffer, CL_TRUE, - 0, sizeof(output_type) * count, - static_cast(output.data()), - 0, NULL, NULL - ); - RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer") - - error = clReleaseMemObject(output_buffer); - RETURN_ON_CL_ERROR(error, "clReleaseMemObject") - - size_t sub_group_count_for_ndrange = 0; - size_t max_sub_group_size_for_ndrange = 0; - if (check_sub_groups_limits) - { - error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, - sizeof(size_t) * dim, enqueued_local_size, - sizeof(size_t), &sub_group_count_for_ndrange, NULL); - RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo") - - error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, - sizeof(size_t) * dim, enqueued_local_size, - sizeof(size_t), &max_sub_group_size_for_ndrange, NULL); - RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo") - } - - size_t num_groups[3]; - for (int d = 0; d < 3; d++) - num_groups[d] = static_cast(std::ceil(static_cast(global_size[d]) / enqueued_local_size[d])); - - size_t group_id[3]; - for (group_id[0] = 0; group_id[0] < num_groups[0]; group_id[0]++) - for (group_id[1] = 0; group_id[1] < num_groups[1]; group_id[1]++) - for (group_id[2] = 0; group_id[2] < num_groups[2]; group_id[2]++) - { - size_t local_size[3]; - for (int d = 0; d < 3; d++) - { - if (group_id[d] == num_groups[d] - 1) - local_size[d] = global_size[d] - group_id[d] * enqueued_local_size[d]; - else - local_size[d] = enqueued_local_size[d]; - } - - size_t local_id[3]; - for (local_id[0] = 0; local_id[0] < local_size[0]; local_id[0]++) - for (local_id[1] = 0; local_id[1] < local_size[1]; local_id[1]++) - for (local_id[2] = 0; local_id[2] < local_size[2]; local_id[2]++) - { - size_t global_id_wo_offset[3]; - size_t global_id[3]; - for (int d = 0; d < 3; d++) - { - global_id_wo_offset[d] = group_id[d] * enqueued_local_size[d] + local_id[d]; - global_id[d] = global_id_wo_offset[d] + global_offset[d]; - } - - // Ignore if the current work-item is outside of global work size (i.e. the work-group is non-uniform) - if (global_id_wo_offset[0] >= global_size[0] || - global_id_wo_offset[1] >= global_size[1] || - global_id_wo_offset[2] >= global_size[2]) break; - - const size_t global_linear_id = - global_id_wo_offset[2] * global_size[1] * global_size[0] + - global_id_wo_offset[1] * global_size[0] + - global_id_wo_offset[0]; - const size_t local_linear_id = - local_id[2] * local_size[1] * local_size[0] + - local_id[1] * local_size[0] + - local_id[0]; - - const output_type &o = output[global_linear_id]; - - CHECK_EQUAL(o.work_dim, dim, "get_work_dim") - for (int d = 0; d < 3; d++) - { - CHECK_EQUAL(o.global_size[d], global_size[d], "get_global_size") - CHECK_EQUAL(o.global_id[d], global_id[d], "get_global_id") - CHECK_EQUAL(o.local_size[d], local_size[d], "get_local_size") - CHECK_EQUAL(o.enqueued_local_size[d], enqueued_local_size[d], "get_enqueued_local_size") - CHECK_EQUAL(o.local_id[d], local_id[d], "get_local_id") - CHECK_EQUAL(o.num_groups[d], num_groups[d], "get_num_groups") - CHECK_EQUAL(o.group_id[d], group_id[d], "get_group_id") - CHECK_EQUAL(o.global_offset[d], global_offset[d], "get_global_offset") - } - - CHECK_EQUAL(o.global_linear_id, global_linear_id, "get_global_linear_id") - CHECK_EQUAL(o.local_linear_id, local_linear_id, "get_local_linear_id") - - // A few (but not all possible) sub-groups related checks - if (check_sub_groups) - { - if (check_sub_groups_limits) - { - CHECK_EQUAL(o.max_sub_group_size, max_sub_group_size_for_ndrange, "get_max_sub_group_size") - CHECK_EQUAL(o.enqueued_num_sub_groups, sub_group_count_for_ndrange, "get_enqueued_num_sub_groups") - } - CHECK(o.sub_group_size == 0 || o.sub_group_size > o.max_sub_group_size, "get_sub_group_size or get_max_sub_group_size") - CHECK(o.num_sub_groups == 0 || o.num_sub_groups > o.enqueued_num_sub_groups, "get_enqueued_num_sub_groups") - CHECK(o.sub_group_id >= o.num_sub_groups, "get_sub_group_id or get_num_sub_groups") - CHECK(o.sub_group_local_id >= o.sub_group_size, "get_sub_group_local_id or get_sub_group_size") - } - } - } - } - } - - clReleaseKernel(kernel); - clReleaseProgram(program); - return error; -} - -#undef CHECK_EQUAL -#undef CHECK - -AUTO_TEST_CASE(test_workitems_uniform) -(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - test_options options; - options.uniform_work_group_size = true; - options.max_count = num_elements; - options.num_tests = 1000; - return test_workitems(device, context, queue, options); -} - -AUTO_TEST_CASE(test_workitems_non_uniform) -(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - test_options options; - options.uniform_work_group_size = false; - options.max_count = num_elements; - options.num_tests = 1000; - return test_workitems(device, context, queue, options); -} - -} // namespace - -#endif // TEST_CONFORMANCE_CLCPP_WI_TEST_WORKITEMS_HPP diff --git a/test_conformance/compiler/test_build_options.cpp b/test_conformance/compiler/test_build_options.cpp index 7ab4454f5f..c25fd10fe7 100644 --- a/test_conformance/compiler/test_build_options.cpp +++ b/test_conformance/compiler/test_build_options.cpp @@ -43,11 +43,12 @@ const char *include_test_kernel[] = { "}\n" }; const char *options_test_kernel[] = { -"__kernel void sample_test(__global float *src, __global int *dst)\n" -"{\n" -" size_t tid = get_global_id(0);\n" -" dst[tid] = src[tid];\n" -"}\n" }; + "__kernel void sample_test(__global float *src, __global int *dst)\n" + "{\n" + " size_t tid = get_global_id(0);\n" + " dst[tid] = (int)src[tid];\n" + "}\n" +}; const char *optimization_options[] = { "-cl-single-precision-constant", @@ -60,10 +61,6 @@ const char *optimization_options[] = { "-cl-fast-relaxed-math", "-w", "-Werror", -#if defined( __APPLE__ ) - "-cl-opt-enable", - "-cl-auto-vectorize-enable" -#endif }; cl_int get_result_from_program( cl_context context, cl_command_queue queue, cl_program program, cl_int *outValue ) diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp index a4a825126b..1519779a14 100644 --- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp +++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp @@ -49,7 +49,8 @@ const char *known_extensions[] = { "cl_khr_subgroup_shuffle", "cl_khr_subgroup_shuffle_relative", "cl_khr_subgroup_clustered_reduce", - + "cl_khr_extended_bit_ops", + "cl_khr_integer_dot_product", // API-only extensions after this point. If you add above here, modify // first_API_extension below. "cl_khr_icd", @@ -71,10 +72,17 @@ const char *known_extensions[] = { "cl_khr_spirv_no_integer_wrap_decoration", "cl_khr_extended_versioning", "cl_khr_device_uuid", + "cl_khr_pci_bus_info", + "cl_khr_suggested_local_work_size", + "cl_khr_spirv_linkonce_odr", + "cl_khr_semaphore", + "cl_khr_external_semaphore", + "cl_khr_external_semaphore_sync_fd", + "cl_khr_command_buffer", }; -size_t num_known_extensions = sizeof(known_extensions)/sizeof(char*); -size_t first_API_extension = 27; +size_t num_known_extensions = sizeof(known_extensions) / sizeof(char *); +size_t first_API_extension = 29; const char *known_embedded_extensions[] = { "cles_khr_int64", @@ -335,10 +343,10 @@ int test_compiler_defines_for_extensions(cl_device_id device, cl_context context strcat(kernel_code, kernel_strings[4]); // Now we need to execute the kernel - cl_mem defines; + clMemWrapper defines; cl_int *data; - cl_program program; - cl_kernel kernel; + clProgramWrapper program; + clKernelWrapper kernel; Version version = get_device_cl_version(device); @@ -432,10 +440,6 @@ int test_compiler_defines_for_extensions(cl_device_id device, cl_context context free(extensions_supported[i]); } free(extensions); - if( defines ) { - error = clReleaseMemObject( defines ); - test_error( error, "Unable to release memory object" ); - } if (total_errors) return -1; diff --git a/test_conformance/computeinfo/CMakeLists.txt b/test_conformance/computeinfo/CMakeLists.txt index 9bdc9e494c..06f0599c11 100644 --- a/test_conformance/computeinfo/CMakeLists.txt +++ b/test_conformance/computeinfo/CMakeLists.txt @@ -4,6 +4,8 @@ set(${MODULE_NAME}_SOURCES main.cpp device_uuid.cpp extended_versioning.cpp + conforming_version.cpp + pci_bus_info.cpp ) include(../CMakeCommon.txt) diff --git a/test_conformance/computeinfo/conforming_version.cpp b/test_conformance/computeinfo/conforming_version.cpp new file mode 100644 index 0000000000..624cf85a22 --- /dev/null +++ b/test_conformance/computeinfo/conforming_version.cpp @@ -0,0 +1,37 @@ + +// +// Copyright (c) 2020 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "harness/testHarness.h" +#include "harness/deviceInfo.h" + +int test_conformance_version(cl_device_id deviceID, cl_context context, + cl_command_queue ignoreQueue, int num_elements) +{ + std::string version_string{ get_device_info_string( + deviceID, CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED) }; + + // Latest conformance version passed should match vYYYY-MM-DD-XX, where XX + // is a number + std::regex valid_format("^v\\d{4}-(((0)[1-9])|((1)[0-2]))-((0)[1-9]|[1-2][" + "0-9]|(3)[0-1])-\\d{2}$"); + test_assert_error( + std::regex_match(version_string, valid_format), + "CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED does not return " + "valid format vYYYY-MM-DD-XX"); + + return TEST_PASS; +} diff --git a/test_conformance/computeinfo/device_uuid.cpp b/test_conformance/computeinfo/device_uuid.cpp index 1ef9dad2ed..7f29d0b683 100644 --- a/test_conformance/computeinfo/device_uuid.cpp +++ b/test_conformance/computeinfo/device_uuid.cpp @@ -105,7 +105,7 @@ int test_device_uuid(cl_device_id deviceID, cl_context context, if (!is_extension_available(deviceID, "cl_khr_device_uuid")) { log_info("cl_khr_device_uuid not supported. Skipping test...\n"); - return 0; + return TEST_SKIPPED_ITSELF; } int total_errors = 0; diff --git a/test_conformance/computeinfo/main.cpp b/test_conformance/computeinfo/main.cpp index 47975f862d..d993655b9a 100644 --- a/test_conformance/computeinfo/main.cpp +++ b/test_conformance/computeinfo/main.cpp @@ -1421,11 +1421,16 @@ int test_computeinfo(cl_device_id deviceID, cl_context context, extern int test_extended_versioning(cl_device_id, cl_context, cl_command_queue, int); extern int test_device_uuid(cl_device_id, cl_context, cl_command_queue, int); +extern int test_conformance_version(cl_device_id, cl_context, cl_command_queue, + int); +extern int test_pci_bus_info(cl_device_id, cl_context, cl_command_queue, int); test_definition test_list[] = { ADD_TEST(computeinfo), ADD_TEST(extended_versioning), ADD_TEST(device_uuid), + ADD_TEST_VERSION(conformance_version, Version(3, 0)), + ADD_TEST(pci_bus_info), }; const int test_num = ARRAY_SIZE(test_list); diff --git a/test_conformance/computeinfo/pci_bus_info.cpp b/test_conformance/computeinfo/pci_bus_info.cpp new file mode 100644 index 0000000000..cd62ca0513 --- /dev/null +++ b/test_conformance/computeinfo/pci_bus_info.cpp @@ -0,0 +1,53 @@ +// +// Copyright (c) 2021 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "harness/compat.h" + +#include +#include + +#include "harness/testHarness.h" +#include "harness/deviceInfo.h" + +int test_pci_bus_info(cl_device_id deviceID, cl_context context, + cl_command_queue ignoreQueue, int num_elements) +{ + if (!is_extension_available(deviceID, "cl_khr_pci_bus_info")) + { + log_info("cl_khr_pci_bus_info not supported. Skipping test...\n"); + return TEST_SKIPPED_ITSELF; + } + + cl_int error; + + cl_device_pci_bus_info_khr info; + + size_t size_ret; + error = clGetDeviceInfo(deviceID, CL_DEVICE_PCI_BUS_INFO_KHR, 0, NULL, + &size_ret); + test_error(error, "Unable to query CL_DEVICE_PCI_BUS_INFO_KHR size"); + test_assert_error( + size_ret == sizeof(info), + "Query for CL_DEVICE_PCI_BUS_INFO_KHR returned an unexpected size"); + + error = clGetDeviceInfo(deviceID, CL_DEVICE_PCI_BUS_INFO_KHR, sizeof(info), + &info, NULL); + test_error(error, "Unable to query CL_DEVICE_PCI_BUS_INFO_KHR"); + + log_info("\tPCI Bus Info: %04x:%02x:%02x.%x\n", info.pci_domain, + info.pci_bus, info.pci_device, info.pci_function); + + return TEST_PASS; +} diff --git a/test_conformance/conversions/basic_test_conversions.cpp b/test_conformance/conversions/basic_test_conversions.cpp index 44417262f0..329988414d 100644 --- a/test_conformance/conversions/basic_test_conversions.cpp +++ b/test_conformance/conversions/basic_test_conversions.cpp @@ -678,7 +678,8 @@ static void uint2short( void *out, void *in){ ((short*) out)[0] = ((cl_uint*) in static void uint2int( void *out, void *in){ ((cl_int*) out)[0] = ((cl_uint*) in)[0]; } static void uint2float( void *out, void *in) { - cl_uint l = ((cl_uint*) in)[0]; + // Use volatile to prevent optimization by Clang compiler + volatile cl_uint l = ((cl_uint *)in)[0]; ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 } static void uint2double( void *out, void *in) @@ -791,7 +792,8 @@ static void ulong2double( void *out, void *in) #endif ((double*) out)[0] = (l == 0 ? 0.0 : (((cl_long)l < 0) ? result * 2.0 : result)); #else - cl_ulong l = ((cl_ulong*) in)[0]; + // Use volatile to prevent optimization by Clang compiler + volatile cl_ulong l = ((cl_ulong *)in)[0]; ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 #endif } diff --git a/test_conformance/conversions/fplib.cpp b/test_conformance/conversions/fplib.cpp index 37707b83f0..e739b9ae3a 100644 --- a/test_conformance/conversions/fplib.cpp +++ b/test_conformance/conversions/fplib.cpp @@ -29,8 +29,8 @@ static uint32_t clz(uint64_t value) for( num_zeros = 0; num_zeros < (sizeof(uint64_t)*8); num_zeros++) { - if(0x8000000000000000 & (value << num_zeros)) - break; + volatile uint64_t v = 0x8000000000000000ull & (value << num_zeros); + if (v) break; } return num_zeros; } diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp index 87b8ead741..d489e28a32 100644 --- a/test_conformance/conversions/test_conversions.cpp +++ b/test_conformance/conversions/test_conversions.cpp @@ -38,6 +38,7 @@ #include #endif +#include #include #include #include @@ -47,6 +48,8 @@ #endif #include +#include + #include "Sleep.h" #include "basic_test_conversions.h" @@ -1003,7 +1006,8 @@ static int DoTest( cl_device_id device, Type outType, Type inType, SaturationMod uint64_t i; gTestCount++; - size_t blockCount = BUFFER_SIZE / MAX( gTypeSizes[ inType ], gTypeSizes[ outType ] ); + size_t blockCount = + BUFFER_SIZE / std::max(gTypeSizes[inType], gTypeSizes[outType]); size_t step = blockCount; uint64_t lastCase = 1ULL << (8*gTypeSizes[ inType ]); cl_event writeInputBuffer = NULL; @@ -1078,7 +1082,7 @@ static int DoTest( cl_device_id device, Type outType, Type inType, SaturationMod fflush(stdout); } - cl_uint count = (uint32_t) MIN( blockCount, lastCase - i ); + cl_uint count = (uint32_t)std::min((uint64_t)blockCount, lastCase - i); writeInputBufferInfo.count = count; // Crate a user event to represent the status of the reference value computation completion @@ -1556,84 +1560,40 @@ static cl_program MakeProgram( Type outType, Type inType, SaturationMode sat, cl_program program; char testName[256]; int error = 0; - const char **strings; - size_t stringCount = 0; + + std::ostringstream source; + if (outType == kdouble || inType == kdouble) + source << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; // Create the program. This is a bit complicated because we are trying to avoid byte and short stores. if (0 == vectorSize) { + // Create the type names. char inName[32]; char outName[32]; - const char *programSource[] = - { - "", // optional pragma - "__kernel void ", testName, "( __global ", inName, " *src, __global ", outName, " *dest )\n" - "{\n" - " size_t i = get_global_id(0);\n" - " dest[i] = src[i];\n" - "}\n" - }; - stringCount = sizeof(programSource) / sizeof(programSource[0]); - strings = programSource; - - if (outType == kdouble || inType == kdouble) - programSource[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; - - //create the type name strncpy(inName, gTypeNames[inType], sizeof(inName)); strncpy(outName, gTypeNames[outType], sizeof(outName)); sprintf(testName, "test_implicit_%s_%s", outName, inName); - vlog("Building implicit %s -> %s conversion test\n", gTypeNames[inType], gTypeNames[outType]); + + source << "__kernel void " << testName << "( __global " << inName + << " *src, __global " << outName << " *dest )\n"; + source << "{\n"; + source << " size_t i = get_global_id(0);\n"; + source << " dest[i] = src[i];\n"; + source << "}\n"; + + vlog("Building implicit %s -> %s conversion test\n", gTypeNames[inType], + gTypeNames[outType]); fflush(stdout); } else { int vectorSizetmp = vectorSizes[vectorSize]; + // Create the type names. char convertString[128]; char inName[32]; char outName[32]; - const char *programSource[] = - { - "", // optional pragma - "__kernel void ", testName, "( __global ", inName, " *src, __global ", outName, " *dest )\n" - "{\n" - " size_t i = get_global_id(0);\n" - " dest[i] = ", convertString, "( src[i] );\n" - "}\n" - }; - const char *programSourceV3[] = - { - "", // optional pragma - "__kernel void ", testName, "( __global ", inName, " *src, __global ", outName, " *dest )\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0))\n" - " vstore3( ", convertString, "( vload3( i, src)), i, dest );\n" - " else\n" - " {\n" - " ", inName, "3 in;\n" - " ", outName, "3 out;\n" - " if( 0 == (i & 1) )\n" - " in.y = src[3*i+1];\n" - " in.x = src[3*i];\n" - " out = ", convertString, "( in ); \n" - " dest[3*i] = out.x;\n" - " if( 0 == (i & 1) )\n" - " dest[3*i+1] = out.y;\n" - " }\n" - "}\n" - }; - stringCount = 3 == vectorSizetmp ? sizeof(programSourceV3) / sizeof(programSourceV3[0]) : - sizeof(programSource) / sizeof(programSource[0]); - strings = 3 == vectorSizetmp ? programSourceV3 : programSource; - - if (outType == kdouble || inType == kdouble) { - programSource[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; - programSourceV3[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; - } - - //create the type name switch (vectorSizetmp) { case 1: @@ -1658,8 +1618,40 @@ static cl_program MakeProgram( Type outType, Type inType, SaturationMode sat, vlog("Building %s( %s ) test\n", convertString, inName); break; } - fflush(stdout); + + if (vectorSizetmp == 3) + { + source << "__kernel void " << testName << "( __global " << inName + << " *src, __global " << outName << " *dest )\n"; + source << "{\n"; + source << " size_t i = get_global_id(0);\n"; + source << " if( i + 1 < get_global_size(0))\n"; + source << " vstore3( " << convertString + << "( vload3( i, src)), i, dest );\n"; + source << " else\n"; + source << " {\n"; + source << " " << inName << "3 in;\n"; + source << " " << outName << "3 out;\n"; + source << " if( 0 == (i & 1) )\n"; + source << " in.y = src[3*i+1];\n"; + source << " in.x = src[3*i];\n"; + source << " out = " << convertString << "( in ); \n"; + source << " dest[3*i] = out.x;\n"; + source << " if( 0 == (i & 1) )\n"; + source << " dest[3*i+1] = out.y;\n"; + source << " }\n"; + source << "}\n"; + } + else + { + source << "__kernel void " << testName << "( __global " << inName + << " *src, __global " << outName << " *dest )\n"; + source << "{\n"; + source << " size_t i = get_global_id(0);\n"; + source << " dest[i] = " << convertString << "( src[i] );\n"; + source << "}\n"; + } } *outKernel = NULL; @@ -1668,7 +1660,10 @@ static cl_program MakeProgram( Type outType, Type inType, SaturationMode sat, flags = "-cl-denorms-are-zero"; // build it - error = create_single_kernel_helper(gContext, &program, outKernel, (cl_uint)stringCount, strings, testName, flags); + std::string sourceString = source.str(); + const char *programSource = sourceString.c_str(); + error = create_single_kernel_helper(gContext, &program, outKernel, 1, + &programSource, testName, flags); if (error) { char buffer[2048] = ""; diff --git a/test_conformance/d3d10/harness.cpp b/test_conformance/d3d10/harness.cpp index ffdfea5a52..93f2281d8b 100644 --- a/test_conformance/d3d10/harness.cpp +++ b/test_conformance/d3d10/harness.cpp @@ -367,41 +367,12 @@ cl_int HarnessD3D10_CreateKernelFromSource( const char *sourceTexts[] = {source}; size_t sourceLengths[] = {strlen(source) }; - status = create_single_kernel_helper_create_program(context, &program, 1, &sourceTexts[0]); + status = create_single_kernel_helper(context, &program, &kernel, 1, + &sourceTexts[0], entrypoint); TestRequire( CL_SUCCESS == status, "clCreateProgramWithSource failed"); } - status = clBuildProgram( - program, - 0, - NULL, - NULL, - NULL, - NULL); - if (CL_SUCCESS != status) - { - char log[2048] = {0}; - status = clGetProgramBuildInfo( - program, - device, - CL_PROGRAM_BUILD_LOG, - sizeof(log), - log, - NULL); - TestPrint("error: %s\n", log); - TestRequire( - CL_SUCCESS == status, - "Compilation error log:\n%s\n", log); - } - - kernel = clCreateKernel( - program, - entrypoint, - &status); - TestRequire( - CL_SUCCESS == status, - "clCreateKernel failed"); clReleaseProgram(program); *outKernel = kernel; diff --git a/test_conformance/d3d11/harness.cpp b/test_conformance/d3d11/harness.cpp index 687c6da27d..90ba200b1c 100644 --- a/test_conformance/d3d11/harness.cpp +++ b/test_conformance/d3d11/harness.cpp @@ -400,41 +400,10 @@ cl_int HarnessD3D11_CreateKernelFromSource( const char *sourceTexts[] = {source}; size_t sourceLengths[] = {strlen(source) }; - status = create_single_kernel_helper_create_program(context, &program, 1, &sourceTexts[0]); - TestRequire( - CL_SUCCESS == status, - "clCreateProgramWithSource failed"); + status = create_single_kernel_helper(context, &program, &kernel, 1, + &sourceTexts[0], entrypoint); + TestRequire(CL_SUCCESS == status, "Kernel creation failed"); } - status = clBuildProgram( - program, - 0, - NULL, - NULL, - NULL, - NULL); - if (CL_SUCCESS != status) - { - char log[2048] = {0}; - status = clGetProgramBuildInfo( - program, - device, - CL_PROGRAM_BUILD_LOG, - sizeof(log), - log, - NULL); - TestPrint("error: %s\n", log); - TestRequire( - CL_SUCCESS == status, - "Compilation error log:\n%s\n", log); - } - - kernel = clCreateKernel( - program, - entrypoint, - &status); - TestRequire( - CL_SUCCESS == status, - "clCreateKernel failed"); clReleaseProgram(program); *outKernel = kernel; diff --git a/test_conformance/device_execution/enqueue_ndrange.cpp b/test_conformance/device_execution/enqueue_ndrange.cpp index 84ac339f58..f228f06373 100644 --- a/test_conformance/device_execution/enqueue_ndrange.cpp +++ b/test_conformance/device_execution/enqueue_ndrange.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -18,6 +18,7 @@ #include "harness/testHarness.h" #include "harness/typeWrappers.h" +#include #include #include "procs.h" @@ -27,271 +28,316 @@ #ifdef CL_VERSION_2_0 extern int gWimpyMode; -static const char* helper_ndrange_1d_glo[] = -{ - NL, "void block_fn(int len, __global atomic_uint* val)" - NL, "{" - NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);" - NL, "}" - NL, "" - NL, "kernel void helper_ndrange_1d_glo(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global atomic_uint* val, __global uint* ofs_arr)" - NL, "{" - NL, " size_t tid = get_global_id(0);" - NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" - NL, "" - NL, " for(int i = 0; i < n; i++)" - NL, " {" - NL, " ndrange_t ndrange = ndrange_1D(glob_size_arr[i]);" - NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, " }" - NL, "}" - NL +static const char *helper_ndrange_1d_glo[] = { + NL, + "void block_fn(int len, __global atomic_uint* val)" NL, + "{" NL, + " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, " + "memory_order_relaxed, memory_scope_device);" NL, + "}" NL, + "" NL, + "kernel void helper_ndrange_1d_glo(__global int* res, uint n, uint len, " + "__global uint* glob_size_arr, __global uint* loc_size_arr, __global " + "atomic_uint* val, __global uint* ofs_arr)" NL, + "{" NL, + " size_t tid = get_global_id(0);" NL, + " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL, + "" NL, + " for(int i = 0; i < n; i++)" NL, + " {" NL, + " ndrange_t ndrange = ndrange_1D(glob_size_arr[i]);" NL, + " int enq_res = enqueue_kernel(get_default_queue(), " + "CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL, + " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL, + " }" NL, + "}" NL }; -static const char* helper_ndrange_1d_loc[] = -{ - NL, "void block_fn(int len, __global atomic_uint* val)" - NL, "{" - NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);" - NL, "}" - NL, "" - NL, "kernel void helper_ndrange_1d_loc(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global atomic_uint* val, __global uint* ofs_arr)" - NL, "{" - NL, " size_t tid = get_global_id(0);" - NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" - NL, "" - NL, " for(int k = 0; k < n; k++)" - NL, " {" - NL, " for(int i = 0; i < n; i++)" - NL, " {" - NL, " if (glob_size_arr[i] >= loc_size_arr[k])" - NL, " {" - NL, " ndrange_t ndrange = ndrange_1D(glob_size_arr[i], loc_size_arr[k]);" - NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, " }" - NL, " }" - NL, " }" - NL, "}" - NL +static const char *helper_ndrange_1d_loc[] = { + NL, + "void block_fn(int len, __global atomic_uint* val)" NL, + "{" NL, + " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, " + "memory_order_relaxed, memory_scope_device);" NL, + "}" NL, + "" NL, + "kernel void helper_ndrange_1d_loc(__global int* res, uint n, uint len, " + "__global uint* glob_size_arr, __global uint* loc_size_arr, __global " + "atomic_uint* val, __global uint* ofs_arr)" NL, + "{" NL, + " size_t tid = get_global_id(0);" NL, + " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL, + "" NL, + " for(int k = 0; k < n; k++)" NL, + " {" NL, + " for(int i = 0; i < n; i++)" NL, + " {" NL, + " if (glob_size_arr[i] >= loc_size_arr[k])" NL, + " {" NL, + " ndrange_t ndrange = ndrange_1D(glob_size_arr[i], " + "loc_size_arr[k]);" NL, + " int enq_res = enqueue_kernel(get_default_queue(), " + "CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL, + " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL, + " }" NL, + " }" NL, + " }" NL, + "}" NL }; -static const char* helper_ndrange_1d_ofs[] = -{ - NL, "void block_fn(int len, __global atomic_uint* val)" - NL, "{" - NL, " atomic_fetch_add_explicit(&val[(get_global_offset(0) + get_global_linear_id()) % len], 1, memory_order_relaxed, memory_scope_device);" - NL, "}" - NL, "" - NL, "kernel void helper_ndrange_1d_ofs(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global atomic_uint* val, __global uint* ofs_arr)" - NL, "{" - NL, " size_t tid = get_global_id(0);" - NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" - NL, "" - NL, " for(int l = 0; l < n; l++)" - NL, " {" - NL, " for(int k = 0; k < n; k++)" - NL, " {" - NL, " for(int i = 0; i < n; i++)" - NL, " {" - NL, " if (glob_size_arr[i] >= loc_size_arr[k])" - NL, " {" - NL, " ndrange_t ndrange = ndrange_1D(ofs_arr[l], glob_size_arr[i], loc_size_arr[k]);" - NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, " }" - NL, " }" - NL, " }" - NL, " }" - NL, "}" - NL +static const char *helper_ndrange_1d_ofs[] = { + NL, + "void block_fn(int len, __global atomic_uint* val)" NL, + "{" NL, + " atomic_fetch_add_explicit(&val[(get_global_offset(0) + " + "get_global_linear_id()) % len], 1u, memory_order_relaxed, " + "memory_scope_device);" NL, + "}" NL, + "" NL, + "kernel void helper_ndrange_1d_ofs(__global int* res, uint n, uint len, " + "__global uint* glob_size_arr, __global uint* loc_size_arr, __global " + "atomic_uint* val, __global uint* ofs_arr)" NL, + "{" NL, + " size_t tid = get_global_id(0);" NL, + " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL, + "" NL, + " for(int l = 0; l < n; l++)" NL, + " {" NL, + " for(int k = 0; k < n; k++)" NL, + " {" NL, + " for(int i = 0; i < n; i++)" NL, + " {" NL, + " if (glob_size_arr[i] >= loc_size_arr[k])" NL, + " {" NL, + " ndrange_t ndrange = ndrange_1D(ofs_arr[l], glob_size_arr[i], " + "loc_size_arr[k]);" NL, + " int enq_res = enqueue_kernel(get_default_queue(), " + "CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL, + " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL, + " }" NL, + " }" NL, + " }" NL, + " }" NL, + "}" NL }; -static const char* helper_ndrange_2d_glo[] = -{ - NL, "void block_fn(int len, __global atomic_uint* val)" - NL, "{" - NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);" - NL, "}" - NL, "" - NL, "kernel void helper_ndrange_2d_glo(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)" - NL, "{" - NL, " size_t tid = get_global_id(0);" - NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" - NL, "" - NL, " for(int i = 0; i < n; i++)" - NL, " {" - NL, " size_t glob_size[2] = { glob_size_arr[i], glob_size_arr[(i + 1) % n] };" - NL, " ndrange_t ndrange = ndrange_2D(glob_size);" - NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, " }" - NL, "}" - NL +static const char *helper_ndrange_2d_glo[] = { + NL, + "void block_fn(int len, __global atomic_uint* val)" NL, + "{" NL, + " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, " + "memory_order_relaxed, memory_scope_device);" NL, + "}" NL, + "" NL, + "kernel void helper_ndrange_2d_glo(__global int* res, uint n, uint len, " + "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* " + "val, __global uint* ofs_arr)" NL, + "{" NL, + " size_t tid = get_global_id(0);" NL, + " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL, + "" NL, + " for(int i = 0; i < n; i++)" NL, + " {" NL, + " size_t glob_size[2] = { glob_size_arr[i], glob_size_arr[(i + 1) % n] " + "};" NL, + " ndrange_t ndrange = ndrange_2D(glob_size);" NL, + " int enq_res = enqueue_kernel(get_default_queue(), " + "CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL, + " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL, + " }" NL, + "}" NL }; -static const char* helper_ndrange_2d_loc[] = -{ - NL, "void block_fn(int len, __global atomic_uint* val)" - NL, "{" - NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);" - NL, "}" - NL, "" - NL, "kernel void helper_ndrange_2d_loc(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)" - NL, "{" - NL, " size_t tid = get_global_id(0);" - NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" - NL, "" - NL, " for(int k = 0; k < n; k++)" - NL, " {" - NL, " for(int i = 0; i < n; i++)" - NL, " {" - NL, " if (glob_size_arr[(i + 1) % n] >= loc_size_arr[k])" - NL, " {" - NL, " size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % n] };" - NL, " size_t loc_size[] = { 1, loc_size_arr[k] };" - NL, "" - NL, " ndrange_t ndrange = ndrange_2D(glob_size, loc_size);" - NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, " }" - NL, " }" - NL, " }" - NL, "}" - NL +static const char *helper_ndrange_2d_loc[] = { + NL, + "void block_fn(int len, __global atomic_uint* val)" NL, + "{" NL, + " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, " + "memory_order_relaxed, memory_scope_device);" NL, + "}" NL, + "" NL, + "kernel void helper_ndrange_2d_loc(__global int* res, uint n, uint len, " + "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* " + "val, __global uint* ofs_arr)" NL, + "{" NL, + " size_t tid = get_global_id(0);" NL, + " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL, + "" NL, + " for(int k = 0; k < n; k++)" NL, + " {" NL, + " for(int i = 0; i < n; i++)" NL, + " {" NL, + " if (glob_size_arr[(i + 1) % n] >= loc_size_arr[k])" NL, + " {" NL, + " size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % " + "n] };" NL, + " size_t loc_size[] = { 1, loc_size_arr[k] };" NL, + "" NL, + " ndrange_t ndrange = ndrange_2D(glob_size, loc_size);" NL, + " int enq_res = enqueue_kernel(get_default_queue(), " + "CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL, + " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL, + " }" NL, + " }" NL, + " }" NL, + "}" NL }; -static const char* helper_ndrange_2d_ofs[] = -{ - NL, "void block_fn(int len, __global atomic_uint* val)" - NL, "{" - NL, " atomic_fetch_add_explicit(&val[(get_global_offset(1) * get_global_size(0) + get_global_offset(0) + get_global_linear_id()) % len], 1, memory_order_relaxed, memory_scope_device);" - NL, "}" - NL, "" - NL, "kernel void helper_ndrange_2d_ofs(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)" - NL, "{" - NL, " size_t tid = get_global_id(0);" - NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" - NL, "" - NL, " for(int l = 0; l < n; l++)" - NL, " {" - NL, " for(int k = 0; k < n; k++)" - NL, " {" - NL, " for(int i = 0; i < n; i++)" - NL, " {" - NL, " if (glob_size_arr[(i + 1) % n] >= loc_size_arr[k])" - NL, " {" - NL, " size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % n]};" - NL, " size_t loc_size[] = { 1, loc_size_arr[k] };" - NL, " size_t ofs[] = { ofs_arr[l], ofs_arr[(l + 1) % n] };" - NL, "" - NL, " ndrange_t ndrange = ndrange_2D(ofs,glob_size,loc_size);" - NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, " }" - NL, " }" - NL, " }" - NL, " }" - NL, "}" - NL +static const char *helper_ndrange_2d_ofs[] = { + NL, + "void block_fn(int len, __global atomic_uint* val)" NL, + "{" NL, + " atomic_fetch_add_explicit(&val[(get_global_offset(1) * " + "get_global_size(0) + get_global_offset(0) + get_global_linear_id()) % " + "len], 1u, memory_order_relaxed, memory_scope_device);" NL, + "}" NL, + "" NL, + "kernel void helper_ndrange_2d_ofs(__global int* res, uint n, uint len, " + "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* " + "val, __global uint* ofs_arr)" NL, + "{" NL, + " size_t tid = get_global_id(0);" NL, + " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL, + "" NL, + " for(int l = 0; l < n; l++)" NL, + " {" NL, + " for(int k = 0; k < n; k++)" NL, + " {" NL, + " for(int i = 0; i < n; i++)" NL, + " {" NL, + " if (glob_size_arr[(i + 1) % n] >= loc_size_arr[k])" NL, + " {" NL, + " size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) " + "% n]};" NL, + " size_t loc_size[] = { 1, loc_size_arr[k] };" NL, + " size_t ofs[] = { ofs_arr[l], ofs_arr[(l + 1) % n] };" NL, + "" NL, + " ndrange_t ndrange = ndrange_2D(ofs,glob_size,loc_size);" NL, + " int enq_res = enqueue_kernel(get_default_queue(), " + "CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL, + " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL, + " }" NL, + " }" NL, + " }" NL, + " }" NL, + "}" NL }; -static const char* helper_ndrange_3d_glo[] = -{ - NL, "void block_fn(int len, __global atomic_uint* val)" - NL, "{" - NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);" - NL, "}" - NL, "" - NL, "kernel void helper_ndrange_3d_glo(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)" - NL, "{" - NL, " size_t tid = get_global_id(0);" - NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" - NL, "" - NL, " for(int i = 0; i < n; i++)" - NL, " {" - NL, " uint global_work_size = glob_size_arr[i] * glob_size_arr[(i + 1) % n] * glob_size_arr[(i + 2) % n];" - NL, " if (global_work_size <= (len * len))" - NL, " {" - NL, " size_t glob_size[3] = { glob_size_arr[i], glob_size_arr[(i + 1) % n], glob_size_arr[(i + 2) % n] };" - NL, " ndrange_t ndrange = ndrange_3D(glob_size);" - NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, " }" - NL, " }" - NL, "}" - NL +static const char *helper_ndrange_3d_glo[] = { + NL, + "void block_fn(int len, __global atomic_uint* val)" NL, + "{" NL, + " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, " + "memory_order_relaxed, memory_scope_device);" NL, + "}" NL, + "" NL, + "kernel void helper_ndrange_3d_glo(__global int* res, uint n, uint len, " + "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* " + "val, __global uint* ofs_arr)" NL, + "{" NL, + " size_t tid = get_global_id(0);" NL, + " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL, + "" NL, + " for(int i = 0; i < n; i++)" NL, + " {" NL, + " uint global_work_size = glob_size_arr[i] * glob_size_arr[(i + 1) % " + "n] * glob_size_arr[(i + 2) % n];" NL, + " if (global_work_size <= (len * len))" NL, + " {" NL, + " size_t glob_size[3] = { glob_size_arr[i], glob_size_arr[(i + 1) % " + "n], glob_size_arr[(i + 2) % n] };" NL, + " ndrange_t ndrange = ndrange_3D(glob_size);" NL, + " int enq_res = enqueue_kernel(get_default_queue(), " + "CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL, + " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL, + " }" NL, + " }" NL, + "}" NL }; -static const char* helper_ndrange_3d_loc[] = -{ - NL, "void block_fn(int len, __global atomic_uint* val)" - NL, "{" - NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);" - NL, "}" - NL, "" - NL, "kernel void helper_ndrange_3d_loc(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)" - NL, "{" - NL, " size_t tid = get_global_id(0);" - NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" - NL, "" - NL, " for(int k = 0; k < n; k++)" - NL, " {" - NL, " for(int i = 0; i < n; i++)" - NL, " {" - NL, " uint global_work_size = glob_size_arr[i] * glob_size_arr[(i + 1) % n] * glob_size_arr[(i + 2) % n];" - NL, " if (glob_size_arr[(i + 2) % n] >= loc_size_arr[k] && global_work_size <= (len * len))" - NL, " {" - NL, " size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % n], glob_size_arr[(i + 2) % n] };" - NL, " size_t loc_size[] = { 1, 1, loc_size_arr[k] };" - NL, " ndrange_t ndrange = ndrange_3D(glob_size,loc_size);" - NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" - NL, " " - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, " }" - NL, " }" - NL, " }" - NL, "}" - NL +static const char *helper_ndrange_3d_loc[] = { + NL, + "void block_fn(int len, __global atomic_uint* val)" NL, + "{" NL, + " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, " + "memory_order_relaxed, memory_scope_device);" NL, + "}" NL, + "" NL, + "kernel void helper_ndrange_3d_loc(__global int* res, uint n, uint len, " + "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* " + "val, __global uint* ofs_arr)" NL, + "{" NL, + " size_t tid = get_global_id(0);" NL, + " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL, + "" NL, + " for(int k = 0; k < n; k++)" NL, + " {" NL, + " for(int i = 0; i < n; i++)" NL, + " {" NL, + " uint global_work_size = glob_size_arr[i] * glob_size_arr[(i + 1) % " + "n] * glob_size_arr[(i + 2) % n];" NL, + " if (glob_size_arr[(i + 2) % n] >= loc_size_arr[k] && " + "global_work_size <= (len * len))" NL, + " {" NL, + " size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % " + "n], glob_size_arr[(i + 2) % n] };" NL, + " size_t loc_size[] = { 1, 1, loc_size_arr[k] };" NL, + " ndrange_t ndrange = ndrange_3D(glob_size,loc_size);" NL, + " int enq_res = enqueue_kernel(get_default_queue(), " + "CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL, + " " NL, + " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL, + " }" NL, + " }" NL, + " }" NL, + "}" NL }; -static const char* helper_ndrange_3d_ofs[] = -{ - NL, "void block_fn(int len, __global atomic_uint* val)" - NL, "{" - NL, " atomic_fetch_add_explicit(&val[(get_global_offset(2) * get_global_size(0) * get_global_size(1) + get_global_offset(1) * get_global_size(0) + get_global_offset(0) + get_global_linear_id()) % len], 1, memory_order_relaxed, memory_scope_device);" - NL, "}" - NL, "" - NL, "kernel void helper_ndrange_3d_ofs(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)" - NL, "{" - NL, " size_t tid = get_global_id(0);" - NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" - NL, "" - NL, " for(int l = 0; l < n; l++)" - NL, " {" - NL, " for(int k = 0; k < n; k++)" - NL, " {" - NL, " for(int i = 0; i < n; i++)" - NL, " {" - NL, " uint global_work_size = glob_size_arr[i] * glob_size_arr[(i + 1) % n] * glob_size_arr[(i + 2) % n];" - NL, " if (glob_size_arr[(i + 2) % n] >= loc_size_arr[k] && global_work_size <= (len * len))" - NL, " {" - NL, " size_t glob_size[3] = { glob_size_arr[i], glob_size_arr[(i + 1) % n], glob_size_arr[(i + 2) % n]};" - NL, " size_t loc_size[3] = { 1, 1, loc_size_arr[k] };" - NL, " size_t ofs[3] = { ofs_arr[l], ofs_arr[(l + 1) % n], ofs_arr[(l + 2) % n] };" - NL, " ndrange_t ndrange = ndrange_3D(ofs,glob_size,loc_size);" - NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, " }" - NL, " }" - NL, " }" - NL, " }" - NL, "}" - NL +static const char *helper_ndrange_3d_ofs[] = { + NL, + "void block_fn(int len, __global atomic_uint* val)" NL, + "{" NL, + " atomic_fetch_add_explicit(&val[(get_global_offset(2) * " + "get_global_size(0) * get_global_size(1) + get_global_offset(1) * " + "get_global_size(0) + get_global_offset(0) + get_global_linear_id()) % " + "len], 1u, memory_order_relaxed, memory_scope_device);" NL, + "}" NL, + "" NL, + "kernel void helper_ndrange_3d_ofs(__global int* res, uint n, uint len, " + "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* " + "val, __global uint* ofs_arr)" NL, + "{" NL, + " size_t tid = get_global_id(0);" NL, + " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL, + "" NL, + " for(int l = 0; l < n; l++)" NL, + " {" NL, + " for(int k = 0; k < n; k++)" NL, + " {" NL, + " for(int i = 0; i < n; i++)" NL, + " {" NL, + " uint global_work_size = glob_size_arr[i] * glob_size_arr[(i + 1) " + "% n] * glob_size_arr[(i + 2) % n];" NL, + " if (glob_size_arr[(i + 2) % n] >= loc_size_arr[k] && " + "global_work_size <= (len * len))" NL, + " {" NL, + " size_t glob_size[3] = { glob_size_arr[i], glob_size_arr[(i + 1) " + "% n], glob_size_arr[(i + 2) % n]};" NL, + " size_t loc_size[3] = { 1, 1, loc_size_arr[k] };" NL, + " size_t ofs[3] = { ofs_arr[l], ofs_arr[(l + 1) % n], ofs_arr[(l " + "+ 2) % n] };" NL, + " ndrange_t ndrange = ndrange_3D(ofs,glob_size,loc_size);" NL, + " int enq_res = enqueue_kernel(get_default_queue(), " + "CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL, + " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL, + " }" NL, + " }" NL, + " }" NL, + " }" NL, + "}" NL }; static const kernel_src_dim_check sources_ndrange_Xd[] = @@ -600,7 +646,7 @@ int test_enqueue_ndrange(cl_device_id device, cl_context context, cl_command_que max_local_size = (max_local_size > MAX_GWS)? MAX_GWS: max_local_size; if(gWimpyMode) { - max_local_size = MIN(8, max_local_size); + max_local_size = std::min((size_t)8, max_local_size); } cl_uint num = 10; diff --git a/test_conformance/device_execution/enqueue_profiling.cpp b/test_conformance/device_execution/enqueue_profiling.cpp index 8e5bab7642..b9e1a179fc 100644 --- a/test_conformance/device_execution/enqueue_profiling.cpp +++ b/test_conformance/device_execution/enqueue_profiling.cpp @@ -89,9 +89,9 @@ int test_enqueue_profiling(cl_device_id device, cl_context context, cl_event kernel_event; - err_ret = create_single_kernel_helper_with_build_options( - context, &program, &kernel, 1, &enqueue_multi_level, - "enqueue_multi_level", "-cl-std=CL2.0"); + err_ret = create_single_kernel_helper(context, &program, &kernel, 1, + &enqueue_multi_level, + "enqueue_multi_level"); if (check_error(err_ret, "Create single kernel failed")) return -1; res_mem = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, diff --git a/test_conformance/device_execution/host_multi_queue.cpp b/test_conformance/device_execution/host_multi_queue.cpp index e9a675c3f3..661d33deb9 100644 --- a/test_conformance/device_execution/host_multi_queue.cpp +++ b/test_conformance/device_execution/host_multi_queue.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -184,7 +184,11 @@ int test_host_multi_queue(cl_device_id device, cl_context context, cl_command_qu global = 16; } - err_ret |= create_single_kernel_helper_with_build_options(context, &program[i], &kernel[i], sources_multi_queue_block[i].num_lines, sources_multi_queue_block[i].lines, sources_multi_queue_block[i].kernel_name, "-cl-std=CL2.0"); + err_ret |= create_single_kernel_helper( + context, &program[i], &kernel[i], + sources_multi_queue_block[i].num_lines, + sources_multi_queue_block[i].lines, + sources_multi_queue_block[i].kernel_name); if(check_error(err_ret, "Create single kernel failed")) { res = -1; break; } mem[i] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(kernel_results), kernel_results, &err_ret); diff --git a/test_conformance/device_execution/host_queue_order.cpp b/test_conformance/device_execution/host_queue_order.cpp index 5dce16042a..5376ea40f0 100644 --- a/test_conformance/device_execution/host_queue_order.cpp +++ b/test_conformance/device_execution/host_queue_order.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -18,6 +18,7 @@ #include "harness/testHarness.h" #include "harness/typeWrappers.h" +#include #include #include "procs.h" @@ -124,7 +125,7 @@ int test_host_queue_order(cl_device_id device, cl_context context, cl_command_qu cl_uint num = arr_size(result); if( gWimpyMode ) { - num = MAX(num / 16, 4); + num = std::max(num / 16, 4U); } clMemWrapper res_mem; @@ -133,10 +134,14 @@ int test_host_queue_order(cl_device_id device, cl_context context, cl_command_qu cl_event kernel_event; - err_ret = create_single_kernel_helper_with_build_options(context, &program1, &kernel1, arr_size(enqueue_block_first_kernel), enqueue_block_first_kernel, "enqueue_block_first_kernel", "-cl-std=CL2.0"); + err_ret = create_single_kernel_helper( + context, &program1, &kernel1, arr_size(enqueue_block_first_kernel), + enqueue_block_first_kernel, "enqueue_block_first_kernel"); if(check_error(err_ret, "Create single kernel failed")) return -1; - err_ret = create_single_kernel_helper_with_build_options(context, &program2, &kernel2, arr_size(enqueue_block_second_kernel), enqueue_block_second_kernel, "enqueue_block_second_kernel", "-cl-std=CL2.0"); + err_ret = create_single_kernel_helper( + context, &program2, &kernel2, arr_size(enqueue_block_second_kernel), + enqueue_block_second_kernel, "enqueue_block_second_kernel"); if(check_error(err_ret, "Create single kernel failed")) return -1; res_mem = clCreateBuffer(context, CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, sizeof(kernel_results), kernel_results, &err_ret); diff --git a/test_conformance/device_execution/utils.cpp b/test_conformance/device_execution/utils.cpp index 66a2211f6b..05b6949172 100644 --- a/test_conformance/device_execution/utils.cpp +++ b/test_conformance/device_execution/utils.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -40,7 +40,8 @@ int run_n_kernel_args(cl_context context, cl_command_queue queue, const char** s cl_uint i; size_t ret_len; - err_ret = create_single_kernel_helper_with_build_options(context, &program, &kernel, num_lines, source, kernel_name, "-cl-std=CL2.0"); + err_ret = create_single_kernel_helper(context, &program, &kernel, num_lines, + source, kernel_name); if(check_error(err_ret, "Create single kernel failed")) return -1; mem = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, res_size, results, &err_ret); diff --git a/test_conformance/extensions/CMakeLists.txt b/test_conformance/extensions/CMakeLists.txt new file mode 100644 index 0000000000..53d77ee55d --- /dev/null +++ b/test_conformance/extensions/CMakeLists.txt @@ -0,0 +1,2 @@ +add_subdirectory( cl_ext_cxx_for_opencl ) +add_subdirectory( cl_khr_dx9_media_sharing ) diff --git a/test_conformance/extensions/cl_ext_cxx_for_opencl/CMakeLists.txt b/test_conformance/extensions/cl_ext_cxx_for_opencl/CMakeLists.txt new file mode 100644 index 0000000000..fd397c31b6 --- /dev/null +++ b/test_conformance/extensions/cl_ext_cxx_for_opencl/CMakeLists.txt @@ -0,0 +1,9 @@ +set(MODULE_NAME CL_EXT_CXX_FOR_OPENCL) + +set(${MODULE_NAME}_SOURCES + main.cpp + cxx_for_opencl_ext.cpp + cxx_for_opencl_ver.cpp +) + +include(../../CMakeCommon.txt) diff --git a/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ext.cpp b/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ext.cpp new file mode 100644 index 0000000000..1d5252cb24 --- /dev/null +++ b/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ext.cpp @@ -0,0 +1,104 @@ +// +// Copyright (c) 2021 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "procs.h" + + +int test_cxx_for_opencl(cl_device_id device, cl_context context, + cl_command_queue queue) +{ + cl_int error; + clProgramWrapper program; + clKernelWrapper kernel1; + clKernelWrapper kernel2; + clMemWrapper in_buffer; + clMemWrapper out_buffer; + cl_int value = 7; + + const char *kernel_sstr = + R"( + __global int x; + template + void execute(T &a, const T &b) { + a = b * 2; + } + __kernel void k1(__global int *p) { + execute(x, *p); + } + __kernel void k2(__global int *p) { + execute(*p, x); + })"; + + error = create_single_kernel_helper_with_build_options( + context, &program, &kernel1, 1, &kernel_sstr, "k1", "-cl-std=CLC++"); + test_error(error, "Failed to create k1 kernel"); + + kernel2 = clCreateKernel(program, "k2", &error); + test_error(error, "Failed to create k2 kernel"); + + in_buffer = + clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, + sizeof(value), &value, &error); + test_error(error, "clCreateBuffer failed"); + + out_buffer = + clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, + sizeof(value), &value, &error); + test_error(error, "clCreateBuffer failed"); + + error = clSetKernelArg(kernel1, 0, sizeof(in_buffer), &in_buffer); + test_error(error, "clSetKernelArg failed"); + + error = clSetKernelArg(kernel2, 0, sizeof(out_buffer), &out_buffer); + test_error(error, "clSetKernelArg failed"); + + size_t global_size = 1; + error = clEnqueueNDRangeKernel(queue, kernel1, 1, nullptr, &global_size, + nullptr, 0, nullptr, nullptr); + test_error(error, "clEnqueueNDRangeKernel failed"); + + error = clEnqueueNDRangeKernel(queue, kernel2, 1, nullptr, &global_size, + nullptr, 0, nullptr, nullptr); + test_error(error, "clEnqueueNDRangeKernel failed"); + + error = clEnqueueReadBuffer(queue, out_buffer, CL_BLOCKING, 0, + sizeof(value), &value, 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed"); + + if (value != 28) + { + log_error("ERROR: Kernel wrote %lu, expected 28\n", + static_cast(value)); + return TEST_FAIL; + } + + return TEST_PASS; +} + +int test_cxx_for_opencl_ext(cl_device_id device, cl_context context, + cl_command_queue queue, int) +{ + if (!is_extension_available(device, "cl_ext_cxx_for_opencl")) + { + log_info("Device does not support 'cl_ext_cxx_for_opencl'. Skipping " + "the test.\n"); + return TEST_SKIPPED_ITSELF; + } + + return test_cxx_for_opencl(device, context, queue); +} diff --git a/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ver.cpp b/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ver.cpp new file mode 100644 index 0000000000..0376081462 --- /dev/null +++ b/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ver.cpp @@ -0,0 +1,102 @@ +// +// Copyright (c) 2021 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "procs.h" + + +int test_cxx_for_opencl_version(cl_device_id device, cl_context context, + cl_command_queue queue) +{ + cl_int cxx4opencl_version; + cl_int cxx4opencl_expected_version; + clProgramWrapper program; + clKernelWrapper kernel; + cl_int error; + cl_int value = 0; + const char *kernel_sstr = + R"( + __kernel void k(__global int* buf) { + buf[0] = __OPENCL_CPP_VERSION__; + })"; + const size_t lengths[1] = { std::string{ kernel_sstr }.size() }; + + clProgramWrapper writer_program = + clCreateProgramWithSource(context, 1, &kernel_sstr, lengths, &error); + test_error(error, "Failed to create program with source"); + + error = clCompileProgram(writer_program, 1, &device, "-cl-std=CLC++", 0, + nullptr, nullptr, nullptr, nullptr); + test_error(error, "Failed to compile program"); + + cl_program progs[1] = { writer_program }; + program = clLinkProgram(context, 1, &device, "", 1, progs, 0, 0, &error); + test_error(error, "Failed to link program"); + + clMemWrapper out = + clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, + sizeof(cxx4opencl_version), &cxx4opencl_version, &error); + test_error(error, "clCreateBuffer failed"); + + kernel = clCreateKernel(program, "k", &error); + test_error(error, "Failed to create k kernel"); + + error = clSetKernelArg(kernel, 0, sizeof(out), &out); + test_error(error, "clSetKernelArg failed"); + + size_t global_size = 1; + error = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &global_size, + nullptr, 0, nullptr, nullptr); + test_error(error, "clEnqueueNDRangeKernel failed"); + + error = clEnqueueReadBuffer(queue, out, CL_BLOCKING, 0, + sizeof(cxx4opencl_version), &cxx4opencl_version, + 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed"); + + error = + clGetDeviceInfo(device, CL_DEVICE_CXX_FOR_OPENCL_NUMERIC_VERSION_EXT, + sizeof(value), &value, nullptr); + test_error(error, "Failed to get device info"); + + cxx4opencl_expected_version = CL_VERSION_MAJOR_KHR(value) * 100 + + CL_VERSION_MINOR_KHR(value) * 10 + CL_VERSION_PATCH_KHR(value); + + if (cxx4opencl_version != cxx4opencl_expected_version) + { + log_error("ERROR: C++ for OpenCL version mismatch - returned %lu, " + "expected %lu\n", + static_cast(value), + static_cast(cxx4opencl_expected_version)); + return TEST_FAIL; + } + + return TEST_PASS; +} + +int test_cxx_for_opencl_ver(cl_device_id device, cl_context context, + cl_command_queue queue, int) +{ + if (!is_extension_available(device, "cl_ext_cxx_for_opencl")) + { + log_info("Device does not support 'cl_ext_cxx_for_opencl'. Skipping " + "the test.\n"); + return TEST_SKIPPED_ITSELF; + } + + return test_cxx_for_opencl_version(device, context, queue); +} diff --git a/test_conformance/clcpp/device_queue/main.cpp b/test_conformance/extensions/cl_ext_cxx_for_opencl/main.cpp similarity index 63% rename from test_conformance/clcpp/device_queue/main.cpp rename to test_conformance/extensions/cl_ext_cxx_for_opencl/main.cpp index 0467b19f00..5e8c14af76 100644 --- a/test_conformance/clcpp/device_queue/main.cpp +++ b/test_conformance/extensions/cl_ext_cxx_for_opencl/main.cpp @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2021 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -13,13 +13,16 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "../common.hpp" -#include "test_enqueue.hpp" +#include "procs.h" +test_definition test_list[] = { + ADD_TEST_VERSION(cxx_for_opencl_ext, Version(2, 0)), + ADD_TEST_VERSION(cxx_for_opencl_ver, Version(2, 0)) +}; int main(int argc, const char *argv[]) { - auto& tests = autotest::test_suite::global_test_suite().test_defs; - return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0); + return runTestHarnessWithCheck(argc, argv, ARRAY_SIZE(test_list), test_list, + false, 0, nullptr); } diff --git a/test_conformance/clcpp/atomics/main.cpp b/test_conformance/extensions/cl_ext_cxx_for_opencl/procs.h similarity index 57% rename from test_conformance/clcpp/atomics/main.cpp rename to test_conformance/extensions/cl_ext_cxx_for_opencl/procs.h index b9f964fa33..5665e012ff 100644 --- a/test_conformance/clcpp/atomics/main.cpp +++ b/test_conformance/extensions/cl_ext_cxx_for_opencl/procs.h @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2021 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -13,13 +13,14 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "../common.hpp" +#ifndef _procs_h +#define _procs_h -#include "atomic_fetch.hpp" +#include "harness/typeWrappers.h" +extern int test_cxx_for_opencl_ext(cl_device_id device, cl_context context, + cl_command_queue queue, int); +extern int test_cxx_for_opencl_ver(cl_device_id device, cl_context context, + cl_command_queue queue, int); -int main(int argc, const char *argv[]) -{ - auto& tests = autotest::test_suite::global_test_suite().test_defs; - return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0); -} +#endif /*_procs_h*/ diff --git a/test_extensions/media_sharing/CMakeLists.txt b/test_conformance/extensions/cl_khr_dx9_media_sharing/CMakeLists.txt similarity index 92% rename from test_extensions/media_sharing/CMakeLists.txt rename to test_conformance/extensions/cl_khr_dx9_media_sharing/CMakeLists.txt index 9fdde1c7e6..1ec2a33895 100644 --- a/test_extensions/media_sharing/CMakeLists.txt +++ b/test_conformance/extensions/cl_khr_dx9_media_sharing/CMakeLists.txt @@ -21,5 +21,5 @@ set_source_files_properties( include_directories(${CMAKE_CURRENT_SOURCE_DIR}) -include(../../test_conformance/CMakeCommon.txt) +include(../../CMakeCommon.txt) endif(WIN32) diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/main.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/main.cpp new file mode 100644 index 0000000000..8b70917316 --- /dev/null +++ b/test_conformance/extensions/cl_khr_dx9_media_sharing/main.cpp @@ -0,0 +1,231 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include +#include + +#include "harness/testHarness.h" +#include "utils.h" +#include "procs.h" + + +test_definition test_list[] = { ADD_TEST(context_create), + ADD_TEST(get_device_ids), + ADD_TEST(api), + ADD_TEST(kernel), + ADD_TEST(other_data_types), + ADD_TEST(memory_access), + ADD_TEST(interop_user_sync) }; + +const int test_num = ARRAY_SIZE(test_list); + +clGetDeviceIDsFromDX9MediaAdapterKHR_fn clGetDeviceIDsFromDX9MediaAdapterKHR = + NULL; +clCreateFromDX9MediaSurfaceKHR_fn clCreateFromDX9MediaSurfaceKHR = NULL; +clEnqueueAcquireDX9MediaSurfacesKHR_fn clEnqueueAcquireDX9MediaSurfacesKHR = + NULL; +clEnqueueReleaseDX9MediaSurfacesKHR_fn clEnqueueReleaseDX9MediaSurfacesKHR = + NULL; + +cl_platform_id gPlatformIDdetected; +cl_device_id gDeviceIDdetected; +cl_device_type gDeviceTypeSelected = CL_DEVICE_TYPE_DEFAULT; + +bool MediaSurfaceSharingExtensionInit() +{ + clGetDeviceIDsFromDX9MediaAdapterKHR = + (clGetDeviceIDsFromDX9MediaAdapterKHR_fn) + clGetExtensionFunctionAddressForPlatform( + gPlatformIDdetected, "clGetDeviceIDsFromDX9MediaAdapterKHR"); + if (clGetDeviceIDsFromDX9MediaAdapterKHR == NULL) + { + log_error("clGetExtensionFunctionAddressForPlatform(" + "clGetDeviceIDsFromDX9MediaAdapterKHR) returned NULL.\n"); + return false; + } + + clCreateFromDX9MediaSurfaceKHR = (clCreateFromDX9MediaSurfaceKHR_fn) + clGetExtensionFunctionAddressForPlatform( + gPlatformIDdetected, "clCreateFromDX9MediaSurfaceKHR"); + if (clCreateFromDX9MediaSurfaceKHR == NULL) + { + log_error("clGetExtensionFunctionAddressForPlatform(" + "clCreateFromDX9MediaSurfaceKHR) returned NULL.\n"); + return false; + } + + clEnqueueAcquireDX9MediaSurfacesKHR = + (clEnqueueAcquireDX9MediaSurfacesKHR_fn) + clGetExtensionFunctionAddressForPlatform( + gPlatformIDdetected, "clEnqueueAcquireDX9MediaSurfacesKHR"); + if (clEnqueueAcquireDX9MediaSurfacesKHR == NULL) + { + log_error("clGetExtensionFunctionAddressForPlatform(" + "clEnqueueAcquireDX9MediaSurfacesKHR) returned NULL.\n"); + return false; + } + + clEnqueueReleaseDX9MediaSurfacesKHR = + (clEnqueueReleaseDX9MediaSurfacesKHR_fn) + clGetExtensionFunctionAddressForPlatform( + gPlatformIDdetected, "clEnqueueReleaseDX9MediaSurfacesKHR"); + if (clEnqueueReleaseDX9MediaSurfacesKHR == NULL) + { + log_error("clGetExtensionFunctionAddressForPlatform(" + "clEnqueueReleaseDX9MediaSurfacesKHR) returned NULL.\n"); + return false; + } + + return true; +} + +bool DetectPlatformAndDevice() +{ + std::vector platforms; + cl_uint platformsNum = 0; + cl_int error = clGetPlatformIDs(0, 0, &platformsNum); + if (error != CL_SUCCESS) + { + print_error(error, "clGetPlatformIDs failed\n"); + return false; + } + + platforms.resize(platformsNum); + error = clGetPlatformIDs(platformsNum, &platforms[0], 0); + if (error != CL_SUCCESS) + { + print_error(error, "clGetPlatformIDs failed\n"); + return false; + } + + bool found = false; + for (size_t i = 0; i < platformsNum; ++i) + { + std::vector devices; + cl_uint devicesNum = 0; + error = clGetDeviceIDs(platforms[i], gDeviceTypeSelected, 0, 0, + &devicesNum); + if (error != CL_SUCCESS) + { + print_error(error, "clGetDeviceIDs failed\n"); + return false; + } + + devices.resize(devicesNum); + error = clGetDeviceIDs(platforms[i], gDeviceTypeSelected, devicesNum, + &devices[0], 0); + if (error != CL_SUCCESS) + { + print_error(error, "clGetDeviceIDs failed\n"); + return false; + } + + for (size_t j = 0; j < devicesNum; ++j) + { + if (is_extension_available(devices[j], "cl_khr_dx9_media_sharing")) + { + gPlatformIDdetected = platforms[i]; + gDeviceIDdetected = devices[j]; + found = true; + break; + } + } + } + + if (!found) + { + log_info("Test was not run, because the media surface sharing " + "extension is not supported for any devices.\n"); + return false; + } + + return true; +} + +bool CmdlineParse(int argc, const char *argv[]) +{ + char *env_mode = getenv("CL_DEVICE_TYPE"); + if (env_mode != NULL) + { + if (strcmp(env_mode, "gpu") == 0 + || strcmp(env_mode, "CL_DEVICE_TYPE_GPU") == 0) + gDeviceTypeSelected = CL_DEVICE_TYPE_GPU; + else if (strcmp(env_mode, "cpu") == 0 + || strcmp(env_mode, "CL_DEVICE_TYPE_CPU") == 0) + gDeviceTypeSelected = CL_DEVICE_TYPE_CPU; + else if (strcmp(env_mode, "accelerator") == 0 + || strcmp(env_mode, "CL_DEVICE_TYPE_ACCELERATOR") == 0) + gDeviceTypeSelected = CL_DEVICE_TYPE_ACCELERATOR; + else if (strcmp(env_mode, "default") == 0 + || strcmp(env_mode, "CL_DEVICE_TYPE_DEFAULT") == 0) + gDeviceTypeSelected = CL_DEVICE_TYPE_DEFAULT; + else + { + log_error("Unknown CL_DEVICE_TYPE env variable setting: " + "%s.\nAborting...\n", + env_mode); + return false; + } + } + + for (int i = 0; i < argc; ++i) + { + if (strcmp(argv[i], "gpu") == 0 + || strcmp(argv[i], "CL_DEVICE_TYPE_GPU") == 0) + { + gDeviceTypeSelected = CL_DEVICE_TYPE_GPU; + continue; + } + else if (strcmp(argv[i], "cpu") == 0 + || strcmp(argv[i], "CL_DEVICE_TYPE_CPU") == 0) + { + gDeviceTypeSelected = CL_DEVICE_TYPE_CPU; + continue; + } + else if (strcmp(argv[i], "accelerator") == 0 + || strcmp(argv[i], "CL_DEVICE_TYPE_ACCELERATOR") == 0) + { + gDeviceTypeSelected = CL_DEVICE_TYPE_ACCELERATOR; + continue; + } + else if (strcmp(argv[i], "CL_DEVICE_TYPE_DEFAULT") == 0) + { + gDeviceTypeSelected = CL_DEVICE_TYPE_DEFAULT; + continue; + } + else if (strcmp(argv[i], "sw") == 0 || strcmp(argv[i], "software") == 0) + { + CDeviceWrapper::AccelerationType(CDeviceWrapper::ACCELERATION_SW); + } + } + + return true; +} + +int main(int argc, const char *argv[]) +{ + if (!CmdlineParse(argc, argv)) return TEST_FAIL; + + if (!DetectPlatformAndDevice()) + { + log_info("Test was not run, because the media surface sharing " + "extension is not supported\n"); + return TEST_SKIP; + } + + if (!MediaSurfaceSharingExtensionInit()) return TEST_FAIL; + + return runTestHarness(argc, argv, test_num, test_list, true, 0); +} diff --git a/test_extensions/media_sharing/procs.h b/test_conformance/extensions/cl_khr_dx9_media_sharing/procs.h similarity index 61% rename from test_extensions/media_sharing/procs.h rename to test_conformance/extensions/cl_khr_dx9_media_sharing/procs.h index 6b577990de..e7fd785e90 100644 --- a/test_extensions/media_sharing/procs.h +++ b/test_conformance/extensions/cl_khr_dx9_media_sharing/procs.h @@ -19,13 +19,20 @@ #define __MEDIA_SHARING_PROCS_H__ -extern int test_context_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_get_device_ids(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_other_data_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_memory_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_interop_user_sync(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); +extern int test_context_create(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_get_device_ids(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_api(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_kernel(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_other_data_types(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_memory_access(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_interop_user_sync(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); -#endif // #ifndef __MEDIA_SHARING_PROCS_H__ \ No newline at end of file +#endif // #ifndef __MEDIA_SHARING_PROCS_H__ \ No newline at end of file diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_create_context.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_create_context.cpp new file mode 100644 index 0000000000..6033ce9bdb --- /dev/null +++ b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_create_context.cpp @@ -0,0 +1,373 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "utils.h" + +int context_create(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, unsigned int width, + unsigned int height, TContextFuncType functionCreate, + cl_dx9_media_adapter_type_khr adapterType, + TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle) +{ + CResult result; + + // create device + std::auto_ptr deviceWrapper; + if (!DeviceCreate(adapterType, deviceWrapper)) + { + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + } + + // generate input data + std::vector bufferIn(width * height * 3 / 2, 0); + if (!YUVGenerate(surfaceFormat, bufferIn, width, height, 0, 255)) + { + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + } + + while (deviceWrapper->AdapterNext()) + { + cl_int error; + // check if the test can be run on the adapter + if (CL_SUCCESS + != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, + deviceWrapper->Device(), result, + sharedHandle))) + { + return result.Result(); + } + + if (surfaceFormat != SURFACE_FORMAT_NV12 + && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat)) + { + std::string sharedHandleStr = + (sharedHandle == SHARED_HANDLE_ENABLED) ? "yes" : "no"; + std::string formatStr; + std::string adapterStr; + SurfaceFormatToString(surfaceFormat, formatStr); + AdapterToString(adapterType, adapterStr); + log_info( + "Skipping test case, image format is not supported by a device " + "(adapter type: %s, format: %s, shared handle: %s)\n", + adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str()); + return result.Result(); + } + + void *objectSharedHandle = 0; + std::auto_ptr surface; + if (!MediaSurfaceCreate( + adapterType, width, height, surfaceFormat, *deviceWrapper, + surface, (sharedHandle == SHARED_HANDLE_ENABLED) ? true : false, + &objectSharedHandle)) + { + log_error("Media surface creation failed for %i adapter\n", + deviceWrapper->AdapterIdx()); + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + } + + cl_context_properties contextProperties[] = { + CL_CONTEXT_PLATFORM, + (cl_context_properties)gPlatformIDdetected, + AdapterTypeToContextInfo(adapterType), + (cl_context_properties)deviceWrapper->Device(), + 0, + }; + + clContextWrapper ctx; + switch (functionCreate) + { + case CONTEXT_CREATE_DEFAULT: + ctx = clCreateContext(&contextProperties[0], 1, + &gDeviceIDdetected, NULL, NULL, &error); + break; + case CONTEXT_CREATE_FROM_TYPE: + ctx = clCreateContextFromType(&contextProperties[0], + gDeviceTypeSelected, NULL, NULL, + &error); + break; + default: + log_error("Unknown context creation function enum\n"); + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + break; + } + + if (error != CL_SUCCESS) + { + std::string functionName; + FunctionContextCreateToString(functionCreate, functionName); + log_error("%s failed: %s\n", functionName.c_str(), + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + if (!YUVSurfaceSet(surfaceFormat, surface, bufferIn, width, height)) + { + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + } + +#if defined(_WIN32) + cl_dx9_surface_info_khr surfaceInfo; + surfaceInfo.resource = + *(static_cast(surface.get())); + surfaceInfo.shared_handle = objectSharedHandle; +#else + void *surfaceInfo = 0; + return TEST_NOT_IMPLEMENTED; +#endif + + std::vector memObjList; + unsigned int planesNum = PlanesNum(surfaceFormat); + std::vector planesList(planesNum); + for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx) + { + planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR( + ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfo, planeIdx, + &error); + if (error != CL_SUCCESS) + { + log_error( + "clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n", + planeIdx, IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + memObjList.push_back(planesList[planeIdx]); + } + + clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties( + ctx, gDeviceIDdetected, 0, &error); + if (error != CL_SUCCESS) + { + log_error("Unable to create command queue: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + if (!ImageInfoVerify(adapterType, memObjList, width, height, surface, + objectSharedHandle)) + { + log_error("Image info verification failed\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + cl_event event; + error = clEnqueueAcquireDX9MediaSurfacesKHR( + cmdQueue, static_cast(memObjList.size()), + &memObjList.at(0), 0, NULL, &event); + if (error != CL_SUCCESS) + { + log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + cl_uint eventType = 0; + error = clGetEventInfo(event, CL_EVENT_COMMAND_TYPE, sizeof(eventType), + &eventType, NULL); + if (error != CL_SUCCESS) + { + log_error("clGetEventInfo failed: %s\n", IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + if (eventType != CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR) + { + log_error( + "Invalid event != CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + clReleaseEvent(event); + + size_t origin[3] = { 0, 0, 0 }; + size_t offset = 0; + size_t frameSize = width * height * 3 / 2; + std::vector out(frameSize, 0); + for (size_t i = 0; i < memObjList.size(); ++i) + { + size_t planeWidth = (i == 0) ? width : width / 2; + size_t planeHeight = (i == 0) ? height : height / 2; + size_t regionPlane[3] = { planeWidth, planeHeight, 1 }; + + error = + clEnqueueReadImage(cmdQueue, memObjList.at(i), CL_TRUE, origin, + regionPlane, 0, 0, &out.at(offset), 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueReadImage failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + offset += planeWidth * planeHeight; + } + + if (!YUVCompare(surfaceFormat, out, bufferIn, width, height)) + { + log_error("OCL object verification failed - clEnqueueReadImage\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + error = clEnqueueReleaseDX9MediaSurfacesKHR( + cmdQueue, static_cast(memObjList.size()), + &memObjList.at(0), 0, NULL, &event); + if (error != CL_SUCCESS) + { + log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + eventType = 0; + error = clGetEventInfo(event, CL_EVENT_COMMAND_TYPE, sizeof(eventType), + &eventType, NULL); + if (error != CL_SUCCESS) + { + log_error("clGetEventInfo failed: %s\n", IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + if (eventType != CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR) + { + log_error( + "Invalid event != CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + clReleaseEvent(event); + + // object verification + std::vector bufferOut(frameSize, 0); + if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut, width, height)) + { + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + if (!YUVCompare(surfaceFormat, bufferOut, bufferIn, width, height)) + { + log_error("Media surface is different than expected\n"); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + } + + if (deviceWrapper->Status() != DEVICE_PASS) + { + std::string adapterName; + AdapterToString(adapterType, adapterName); + if (deviceWrapper->Status() == DEVICE_FAIL) + { + log_error("%s init failed\n", adapterName.c_str()); + result.ResultSub(CResult::TEST_FAIL); + } + else + { + log_error("%s init incomplete due to unsupported device\n", + adapterName.c_str()); + result.ResultSub(CResult::TEST_NOTSUPPORTED); + } + } + + return result.Result(); +} + +int test_context_create(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + const unsigned int WIDTH = 256; + const unsigned int HEIGHT = 256; + + std::vector adapterTypes; +#if defined(_WIN32) + adapterTypes.push_back(CL_ADAPTER_D3D9_KHR); + adapterTypes.push_back(CL_ADAPTER_D3D9EX_KHR); + adapterTypes.push_back(CL_ADAPTER_DXVA_KHR); +#endif + + std::vector contextFuncs; + contextFuncs.push_back(CONTEXT_CREATE_DEFAULT); + contextFuncs.push_back(CONTEXT_CREATE_FROM_TYPE); + + std::vector formats; + formats.push_back(SURFACE_FORMAT_NV12); + formats.push_back(SURFACE_FORMAT_YV12); + + std::vector sharedHandleTypes; + sharedHandleTypes.push_back(SHARED_HANDLE_DISABLED); +#if defined(_WIN32) + sharedHandleTypes.push_back(SHARED_HANDLE_ENABLED); +#endif + + CResult result; + for (size_t adapterTypeIdx = 0; adapterTypeIdx < adapterTypes.size(); + ++adapterTypeIdx) + { + // iteration through all create context functions + for (size_t contextFuncIdx = 0; contextFuncIdx < contextFuncs.size(); + ++contextFuncIdx) + { + // iteration through surface formats + for (size_t formatIdx = 0; formatIdx < formats.size(); ++formatIdx) + { + // shared handle enabled or disabled + for (size_t sharedHandleIdx = 0; + sharedHandleIdx < sharedHandleTypes.size(); + ++sharedHandleIdx) + { + if (adapterTypes[adapterTypeIdx] == CL_ADAPTER_D3D9_KHR + && sharedHandleTypes[sharedHandleIdx] + == SHARED_HANDLE_ENABLED) + continue; + + if (context_create( + deviceID, context, queue, num_elements, WIDTH, + HEIGHT, contextFuncs[contextFuncIdx], + adapterTypes[adapterTypeIdx], formats[formatIdx], + sharedHandleTypes[sharedHandleIdx]) + != 0) + { + std::string sharedHandle = + (sharedHandleTypes[sharedHandleIdx] + == SHARED_HANDLE_ENABLED) + ? "shared handle" + : "no shared handle"; + std::string formatStr; + std::string adapterTypeStr; + SurfaceFormatToString(formats[formatIdx], formatStr); + AdapterToString(adapterTypes[adapterTypeIdx], + adapterTypeStr); + + log_error("\nTest case - clCreateContext (%s, %s, %s) " + "failed\n\n", + adapterTypeStr.c_str(), formatStr.c_str(), + sharedHandle.c_str()); + result.ResultSub(CResult::TEST_FAIL); + } + } + } + } + } + + return result.Result(); +} diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_functions_api.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_functions_api.cpp new file mode 100644 index 0000000000..ab92cb890a --- /dev/null +++ b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_functions_api.cpp @@ -0,0 +1,781 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "utils.h" + +int api_functions(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + unsigned int iterationNum, unsigned int width, + unsigned int height, + cl_dx9_media_adapter_type_khr adapterType, + TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle) +{ + const unsigned int FRAME_NUM = 2; + const cl_uchar MAX_VALUE = 255 / 2; + CResult result; + + // create device + std::auto_ptr deviceWrapper; + if (!DeviceCreate(adapterType, deviceWrapper)) + { + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + } + + // generate input and expected data + std::vector> bufferRef1(FRAME_NUM); + std::vector> bufferRef2(FRAME_NUM); + std::vector> bufferRef3(FRAME_NUM); + size_t frameSize = width * height * 3 / 2; + cl_uchar step = MAX_VALUE / FRAME_NUM; + for (size_t i = 0; i < FRAME_NUM; ++i) + { + if (!YUVGenerate(surfaceFormat, bufferRef1[i], width, height, + static_cast(step * i), + static_cast(step * (i + 1))) + || !YUVGenerate(surfaceFormat, bufferRef2[i], width, height, + static_cast(step * i), + static_cast(step * (i + 1)), 0.2) + || !YUVGenerate(surfaceFormat, bufferRef3[i], width, height, + static_cast(step * i), + static_cast(step * (i + 1)), 0.4)) + { + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + } + } + + // iterates through all devices + while (deviceWrapper->AdapterNext()) + { + cl_int error; + // check if the test can be run on the adapter + if (CL_SUCCESS + != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, + deviceWrapper->Device(), result, + sharedHandle))) + { + return result.Result(); + } + + if (surfaceFormat != SURFACE_FORMAT_NV12 + && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat)) + { + std::string sharedHandleStr = + (sharedHandle == SHARED_HANDLE_ENABLED) ? "yes" : "no"; + std::string formatStr; + std::string adapterStr; + SurfaceFormatToString(surfaceFormat, formatStr); + AdapterToString(adapterType, adapterStr); + log_info( + "Skipping test case, image format is not supported by a device " + "(adapter type: %s, format: %s, shared handle: %s)\n", + adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str()); + return result.Result(); + } + + void *objectSharedHandle = 0; + std::auto_ptr surface; + + // create surface + if (!MediaSurfaceCreate( + adapterType, width, height, surfaceFormat, *deviceWrapper, + surface, (sharedHandle == SHARED_HANDLE_ENABLED) ? true : false, + &objectSharedHandle)) + { + log_error("Media surface creation failed for %i adapter\n", + deviceWrapper->AdapterIdx()); + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + } + + cl_context_properties contextProperties[] = { + CL_CONTEXT_PLATFORM, + (cl_context_properties)gPlatformIDdetected, + AdapterTypeToContextInfo(adapterType), + (cl_context_properties)deviceWrapper->Device(), + 0, + }; + + clContextWrapper ctx = clCreateContext( + &contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error); + if (error != CL_SUCCESS) + { + log_error("clCreateContext failed: %s\n", IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + +#if defined(_WIN32) + cl_dx9_surface_info_khr surfaceInfo; + surfaceInfo.resource = + *(static_cast(surface.get())); + surfaceInfo.shared_handle = objectSharedHandle; +#else + void *surfaceInfo = 0; + return TEST_NOT_IMPLEMENTED; +#endif + + std::vector memObjList; + unsigned int planesNum = PlanesNum(surfaceFormat); + std::vector planesList(planesNum); + for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx) + { + planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR( + ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfo, planeIdx, + &error); + if (error != CL_SUCCESS) + { + log_error( + "clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n", + planeIdx, IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + memObjList.push_back(planesList[planeIdx]); + } + + clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties( + ctx, gDeviceIDdetected, 0, &error); + if (error != CL_SUCCESS) + { + log_error("Unable to create command queue: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + if (!ImageInfoVerify(adapterType, memObjList, width, height, surface, + objectSharedHandle)) + { + log_error("Image info verification failed\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + for (size_t frameIdx = 0; frameIdx < iterationNum; ++frameIdx) + { + if (!YUVSurfaceSet(surfaceFormat, surface, + bufferRef1[frameIdx % FRAME_NUM], width, height)) + { + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + } + + error = clEnqueueAcquireDX9MediaSurfacesKHR( + cmdQueue, static_cast(memObjList.size()), + &memObjList[0], 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + { // read operation + std::vector out(frameSize, 0); + size_t offset = 0; + size_t origin[3] = { 0, 0, 0 }; + + for (size_t i = 0; i < memObjList.size(); ++i) + { + size_t planeWidth = (i == 0) ? width : width / 2; + size_t planeHeight = (i == 0) ? height : height / 2; + size_t regionPlane[3] = { planeWidth, planeHeight, 1 }; + + error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, + origin, regionPlane, 0, 0, + &out[offset], 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueReadImage failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + offset += planeWidth * planeHeight; + } + + if (!YUVCompare(surfaceFormat, out, + bufferRef1[frameIdx % FRAME_NUM], width, + height)) + { + log_error("Frame idx: %i, OCL image is different then " + "shared OCL object: clEnqueueReadImage\n", + frameIdx); + result.ResultSub(CResult::TEST_FAIL); + } + } + + { // write operation + size_t offset = 0; + size_t origin[3] = { 0, 0, 0 }; + for (size_t i = 0; i < memObjList.size(); ++i) + { + size_t planeWidth = (i == 0) ? width : width / 2; + size_t planeHeight = (i == 0) ? height : height / 2; + size_t regionPlane[3] = { planeWidth, planeHeight, 1 }; + + error = clEnqueueWriteImage( + cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane, + 0, 0, &bufferRef2[frameIdx % FRAME_NUM][offset], 0, 0, + 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueWriteImage failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + offset += planeWidth * planeHeight; + } + } + + { // read operation + std::vector out(frameSize, 0); + size_t offset = 0; + size_t origin[3] = { 0, 0, 0 }; + + for (size_t i = 0; i < memObjList.size(); ++i) + { + size_t planeWidth = (i == 0) ? width : width / 2; + size_t planeHeight = (i == 0) ? height : height / 2; + size_t regionPlane[3] = { planeWidth, planeHeight, 1 }; + + error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, + origin, regionPlane, 0, 0, + &out[offset], 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueReadImage failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + offset += planeWidth * planeHeight; + } + + if (!YUVCompare(surfaceFormat, out, + bufferRef2[frameIdx % FRAME_NUM], width, + height)) + { + log_error("Frame idx: %i, Shared OCL image verification " + "after clEnqueueWriteImage failed\n", + frameIdx); + result.ResultSub(CResult::TEST_FAIL); + } + } + + { // copy operation (shared OCL to OCL) + size_t offset = 0; + size_t origin[3] = { 0, 0, 0 }; + std::vector out(frameSize, 0); + for (size_t i = 0; i < memObjList.size(); ++i) + { + size_t planeWidth = (i == 0) ? width : width / 2; + size_t planeHeight = (i == 0) ? height : height / 2; + size_t regionPlane[3] = { planeWidth, planeHeight, 1 }; + + cl_image_format formatPlane; + formatPlane.image_channel_data_type = CL_UNORM_INT8; + formatPlane.image_channel_order = + (surfaceFormat == SURFACE_FORMAT_NV12 && i > 0) ? CL_RG + : CL_R; + + cl_image_desc imageDesc = { 0 }; + imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; + imageDesc.image_width = planeWidth; + imageDesc.image_height = planeHeight; + + clMemWrapper planeOCL = + clCreateImage(ctx, CL_MEM_READ_WRITE, &formatPlane, + &imageDesc, 0, &error); + if (error != CL_SUCCESS) + { + log_error("clCreateImage failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + error = clEnqueueCopyImage(cmdQueue, memObjList[i], + planeOCL, origin, origin, + regionPlane, 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueCopyImage failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + error = clEnqueueReadImage(cmdQueue, planeOCL, CL_TRUE, + origin, regionPlane, 0, 0, + &out[offset], 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueReadImage failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + offset += planeWidth * planeHeight; + } + + if (!YUVCompare(surfaceFormat, out, + bufferRef2[frameIdx % FRAME_NUM], width, + height)) + { + log_error( + "Frame idx: %i, OCL image verification after " + "clEnqueueCopyImage (from shared OCL to OCL) failed\n", + frameIdx); + result.ResultSub(CResult::TEST_FAIL); + } + } + + { // copy operation (OCL to shared OCL) + size_t offset = 0; + size_t origin[3] = { 0, 0, 0 }; + std::vector out(frameSize, 0); + for (size_t i = 0; i < memObjList.size(); ++i) + { + size_t planeWidth = (i == 0) ? width : width / 2; + size_t planeHeight = (i == 0) ? height : height / 2; + size_t regionPlane[3] = { planeWidth, planeHeight, 1 }; + size_t pitchSize = + ((surfaceFormat == SURFACE_FORMAT_NV12 && i > 0) + ? width + : planeWidth) + * sizeof(cl_uchar); + + cl_image_format formatPlane; + formatPlane.image_channel_data_type = CL_UNORM_INT8; + formatPlane.image_channel_order = + (surfaceFormat == SURFACE_FORMAT_NV12 && i > 0) ? CL_RG + : CL_R; + + cl_image_desc imageDesc = { 0 }; + imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; + imageDesc.image_width = planeWidth; + imageDesc.image_height = planeHeight; + imageDesc.image_row_pitch = pitchSize; + + clMemWrapper planeOCL = clCreateImage( + ctx, CL_MEM_COPY_HOST_PTR, &formatPlane, &imageDesc, + &bufferRef1[frameIdx % FRAME_NUM][offset], &error); + if (error != CL_SUCCESS) + { + log_error("clCreateImage failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + error = clEnqueueCopyImage(cmdQueue, planeOCL, + memObjList[i], origin, origin, + regionPlane, 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueCopyImage failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, + origin, regionPlane, 0, 0, + &out[offset], 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueReadImage failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + offset += planeWidth * planeHeight; + } + + if (!YUVCompare(surfaceFormat, out, + bufferRef1[frameIdx % FRAME_NUM], width, + height)) + { + log_error( + "Frame idx: %i, OCL image verification after " + "clEnqueueCopyImage (from OCL to shared OCL) failed\n", + frameIdx); + result.ResultSub(CResult::TEST_FAIL); + } + } + + { // copy from image to buffer + size_t offset = 0; + size_t origin[3] = { 0, 0, 0 }; + size_t bufferSize = sizeof(cl_uchar) * frameSize; + clMemWrapper buffer = clCreateBuffer(ctx, CL_MEM_READ_WRITE, + bufferSize, NULL, &error); + if (error != CL_SUCCESS) + { + log_error("clCreateBuffer failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + for (size_t i = 0; i < memObjList.size(); ++i) + { + size_t planeWidth = (i == 0) ? width : width / 2; + size_t planeHeight = (i == 0) ? height : height / 2; + size_t regionPlane[3] = { planeWidth, planeHeight, 1 }; + + error = clEnqueueCopyImageToBuffer( + cmdQueue, memObjList[i], buffer, origin, regionPlane, + offset, 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueCopyImageToBuffer failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + offset += planeWidth * planeHeight * sizeof(cl_uchar); + } + + std::vector out(frameSize, 0); + error = clEnqueueReadBuffer(cmdQueue, buffer, CL_TRUE, 0, + bufferSize, &out[0], 0, NULL, NULL); + if (error != CL_SUCCESS) + { + log_error("Unable to read buffer"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (!YUVCompare(surfaceFormat, out, + bufferRef1[frameIdx % FRAME_NUM], width, + height)) + { + log_error("Frame idx: %i, OCL buffer verification after " + "clEnqueueCopyImageToBuffer (from shared OCL " + "image to OCL buffer) failed\n", + frameIdx); + result.ResultSub(CResult::TEST_FAIL); + } + } + + { // copy buffer to image + size_t bufferSize = sizeof(cl_uchar) * frameSize; + clMemWrapper buffer = clCreateBuffer( + ctx, CL_MEM_COPY_HOST_PTR, bufferSize, + &bufferRef2[frameIdx % FRAME_NUM][0], &error); + if (error != CL_SUCCESS) + { + log_error("clCreateBuffer failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + size_t offset = 0; + size_t origin[3] = { 0, 0, 0 }; + std::vector out(frameSize, 0); + for (size_t i = 0; i < memObjList.size(); ++i) + { + size_t planeWidth = (i == 0) ? width : width / 2; + size_t planeHeight = (i == 0) ? height : height / 2; + size_t regionPlane[3] = { planeWidth, planeHeight, 1 }; + + error = clEnqueueCopyBufferToImage( + cmdQueue, buffer, memObjList[i], offset, origin, + regionPlane, 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueCopyBufferToImage failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, + origin, regionPlane, 0, 0, + &out[offset], 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueReadImage failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + offset += planeWidth * planeHeight * sizeof(cl_uchar); + } + + if (!YUVCompare(surfaceFormat, out, + bufferRef2[frameIdx % FRAME_NUM], width, + height)) + { + log_error("Frame idx: %i, OCL image verification after " + "clEnqueueCopyBufferToImage (from OCL buffer to " + "shared OCL image) failed\n", + frameIdx); + result.ResultSub(CResult::TEST_FAIL); + } + } + + { // map operation to read + size_t offset = 0; + size_t origin[3] = { 0, 0, 0 }; + std::vector out(frameSize, 0); + for (size_t i = 0; i < memObjList.size(); ++i) + { + size_t planeWidth = (i == 0) ? width : width / 2; + size_t planeHeight = (i == 0) ? height : height / 2; + size_t regionPlane[3] = { planeWidth, planeHeight, 1 }; + size_t pitchSize = + ((surfaceFormat == SURFACE_FORMAT_NV12 && i > 0) + ? width + : planeWidth); + + size_t rowPitch = 0; + size_t slicePitch = 0; + void *mapPtr = clEnqueueMapImage( + cmdQueue, memObjList[i], CL_TRUE, CL_MAP_READ, origin, + regionPlane, &rowPitch, &slicePitch, 0, 0, 0, &error); + if (error != CL_SUCCESS) + { + log_error("clEnqueueMapImage failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + for (size_t y = 0; y < planeHeight; ++y) + memcpy(&out[offset + y * pitchSize], + static_cast(mapPtr) + + y * rowPitch / sizeof(cl_uchar), + pitchSize * sizeof(cl_uchar)); + + error = clEnqueueUnmapMemObject(cmdQueue, memObjList[i], + mapPtr, 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueUnmapMemObject failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + offset += pitchSize * planeHeight; + } + + if (!YUVCompare(surfaceFormat, out, + bufferRef2[frameIdx % FRAME_NUM], width, + height)) + { + log_error("Frame idx: %i, Mapped shared OCL image is " + "different then expected\n", + frameIdx); + result.ResultSub(CResult::TEST_FAIL); + } + } + + { // map operation to write + size_t offset = 0; + size_t origin[3] = { 0, 0, 0 }; + for (size_t i = 0; i < memObjList.size(); ++i) + { + size_t planeWidth = (i == 0) ? width : width / 2; + size_t planeHeight = (i == 0) ? height : height / 2; + size_t regionPlane[3] = { planeWidth, planeHeight, 1 }; + size_t pitchSize = + ((surfaceFormat == SURFACE_FORMAT_NV12 && i > 0) + ? width + : planeWidth); + + size_t rowPitch = 0; + size_t slicePitch = 0; + void *mapPtr = clEnqueueMapImage( + cmdQueue, memObjList[i], CL_TRUE, CL_MAP_WRITE, origin, + regionPlane, &rowPitch, &slicePitch, 0, 0, 0, &error); + if (error != CL_SUCCESS) + { + log_error("clEnqueueMapImage failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + for (size_t y = 0; y < planeHeight; ++y) + memcpy(static_cast(mapPtr) + + y * rowPitch / sizeof(cl_uchar), + &bufferRef3[frameIdx % FRAME_NUM] + [offset + y * pitchSize], + pitchSize * sizeof(cl_uchar)); + + error = clEnqueueUnmapMemObject(cmdQueue, memObjList[i], + mapPtr, 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueUnmapMemObject failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + offset += pitchSize * planeHeight; + } + } + + error = clEnqueueReleaseDX9MediaSurfacesKHR( + cmdQueue, static_cast(memObjList.size()), + &memObjList[0], 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + std::vector bufferOut(frameSize, 0); + if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut, width, + height)) + { + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + if (!YUVCompare(surfaceFormat, bufferOut, + bufferRef3[frameIdx % FRAME_NUM], width, height)) + { + log_error( + "Frame idx: %i, media surface is different than expected\n", + frameIdx); + result.ResultSub(CResult::TEST_FAIL); + } + } + } + + if (deviceWrapper->Status() != DEVICE_PASS) + { + std::string adapterName; + AdapterToString(adapterType, adapterName); + if (deviceWrapper->Status() == DEVICE_FAIL) + { + log_error("%s init failed\n", adapterName.c_str()); + result.ResultSub(CResult::TEST_FAIL); + } + else + { + log_error("%s init incomplete due to unsupported device\n", + adapterName.c_str()); + result.ResultSub(CResult::TEST_NOTSUPPORTED); + } + } + + return result.Result(); +} + +int test_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements) +{ + CResult result; + +#if defined(_WIN32) + // D3D9 + if (api_functions(deviceID, context, queue, num_elements, 10, 256, 256, + CL_ADAPTER_D3D9_KHR, SURFACE_FORMAT_NV12, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9, NV12, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (api_functions(deviceID, context, queue, num_elements, 3, 512, 256, + CL_ADAPTER_D3D9_KHR, SURFACE_FORMAT_YV12, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9, YV12, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + // D3D9EX + if (api_functions(deviceID, context, queue, num_elements, 5, 256, 512, + CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_NV12, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9EX, NV12, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (api_functions(deviceID, context, queue, num_elements, 7, 512, 256, + CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_NV12, + SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (D3D9EX, NV12, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (api_functions(deviceID, context, queue, num_elements, 10, 256, 256, + CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_YV12, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9EX, YV12, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (api_functions(deviceID, context, queue, num_elements, 15, 128, 128, + CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_YV12, + SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (D3D9EX, YV12, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + // DXVA + if (api_functions(deviceID, context, queue, num_elements, 20, 128, 128, + CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_NV12, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (DXVA, NV12, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (api_functions(deviceID, context, queue, num_elements, 40, 64, 64, + CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_NV12, + SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (DXVA, NV12, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (api_functions(deviceID, context, queue, num_elements, 5, 512, 512, + CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_YV12, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (DXVA, YV12, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (api_functions(deviceID, context, queue, num_elements, 2, 1024, 1024, + CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_YV12, + SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (DXVA, YV12, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + +#else + return TEST_NOT_IMPLEMENTED; +#endif + + return result.Result(); +} diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_functions_kernel.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_functions_kernel.cpp new file mode 100644 index 0000000000..a204440dd6 --- /dev/null +++ b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_functions_kernel.cpp @@ -0,0 +1,541 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include +#include + +#include "harness/errorHelpers.h" +#include "harness/kernelHelpers.h" + +#include "utils.h" + +int kernel_functions(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + unsigned int iterationNum, unsigned int width, + unsigned int height, + cl_dx9_media_adapter_type_khr adapterType, + TSurfaceFormat surfaceFormat, + TSharedHandleType sharedHandle) +{ + const unsigned int FRAME_NUM = 2; + const cl_uchar MAX_VALUE = 255 / 2; + const std::string PROGRAM_STR = + "__kernel void TestFunction( read_only image2d_t planeIn, write_only " + "image2d_t planeOut, " NL " sampler_t " + "sampler, __global int *planeRes)" NL "{" NL + " int w = get_global_id(0);" NL " int h = get_global_id(1);" NL + " int width = get_image_width(planeIn);" NL + " int height = get_image_height(planeOut);" NL + " float4 color0 = read_imagef(planeIn, sampler, (int2)(w,h)) + " + "0.2f;" NL " float4 color1 = read_imagef(planeIn, sampler, " + "(float2)(w,h)) + 0.2f;" NL + " color0 = (color0 == color1) ? color0: (float4)(0.5, 0.5, 0.5, " + "0.5);" NL " write_imagef(planeOut, (int2)(w,h), color0);" NL + " if(w == 0 && h == 0)" NL " {" NL " planeRes[0] = width;" NL + " planeRes[1] = height;" NL " }" NL "}"; + + CResult result; + + std::auto_ptr deviceWrapper; + if (!DeviceCreate(adapterType, deviceWrapper)) + { + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + } + + std::vector> bufferIn(FRAME_NUM); + std::vector> bufferExp(FRAME_NUM); + size_t frameSize = width * height * 3 / 2; + cl_uchar step = MAX_VALUE / FRAME_NUM; + for (size_t i = 0; i < FRAME_NUM; ++i) + { + if (!YUVGenerate(surfaceFormat, bufferIn[i], width, height, + static_cast(step * i), + static_cast(step * (i + 1))) + || !YUVGenerate(surfaceFormat, bufferExp[i], width, height, + static_cast(step * i), + static_cast(step * (i + 1)), 0.2)) + { + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + } + } + + while (deviceWrapper->AdapterNext()) + { + cl_int error; + // check if the test can be run on the adapter + if (CL_SUCCESS + != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, + deviceWrapper->Device(), result, + sharedHandle))) + { + return result.Result(); + } + + if (surfaceFormat != SURFACE_FORMAT_NV12 + && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat)) + { + std::string sharedHandleStr = + (sharedHandle == SHARED_HANDLE_ENABLED) ? "yes" : "no"; + std::string formatStr; + std::string adapterStr; + SurfaceFormatToString(surfaceFormat, formatStr); + AdapterToString(adapterType, adapterStr); + log_info( + "Skipping test case, image format is not supported by a device " + "(adapter type: %s, format: %s, shared handle: %s)\n", + adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str()); + return result.Result(); + } + + void *objectSrcHandle = 0; + std::auto_ptr surfaceSrc; + if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, + *deviceWrapper, surfaceSrc, + (sharedHandle == SHARED_HANDLE_ENABLED) ? true + : false, + &objectSrcHandle)) + { + log_error("Media surface creation failed for %i adapter\n", + deviceWrapper->AdapterIdx()); + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + } + + void *objectDstHandle = 0; + std::auto_ptr surfaceDst; + if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, + *deviceWrapper, surfaceDst, + (sharedHandle == SHARED_HANDLE_ENABLED) ? true + : false, + &objectDstHandle)) + { + log_error("Media surface creation failed for %i adapter\n", + deviceWrapper->AdapterIdx()); + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + } + + cl_context_properties contextProperties[] = { + CL_CONTEXT_PLATFORM, + (cl_context_properties)gPlatformIDdetected, + AdapterTypeToContextInfo(adapterType), + (cl_context_properties)deviceWrapper->Device(), + 0, + }; + + clContextWrapper ctx = clCreateContext( + &contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error); + if (error != CL_SUCCESS) + { + log_error("clCreateContext failed: %s\n", IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + +#if defined(_WIN32) + cl_dx9_surface_info_khr surfaceInfoSrc; + surfaceInfoSrc.resource = + *(static_cast(surfaceSrc.get())); + surfaceInfoSrc.shared_handle = objectSrcHandle; + + cl_dx9_surface_info_khr surfaceInfoDst; + surfaceInfoDst.resource = + *(static_cast(surfaceDst.get())); + surfaceInfoDst.shared_handle = objectDstHandle; +#else + void *surfaceInfoSrc = 0; + void *surfaceInfoDst = 0; + return TEST_NOT_IMPLEMENTED; +#endif + + std::vector memObjSrcList; + std::vector memObjDstList; + unsigned int planesNum = PlanesNum(surfaceFormat); + std::vector planeSrcList(planesNum); + std::vector planeDstList(planesNum); + for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx) + { + planeSrcList[planeIdx] = clCreateFromDX9MediaSurfaceKHR( + ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfoSrc, planeIdx, + &error); + if (error != CL_SUCCESS) + { + log_error( + "clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n", + planeIdx, IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + memObjSrcList.push_back(planeSrcList[planeIdx]); + + planeDstList[planeIdx] = clCreateFromDX9MediaSurfaceKHR( + ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfoDst, planeIdx, + &error); + if (error != CL_SUCCESS) + { + log_error( + "clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n", + planeIdx, IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + memObjDstList.push_back(planeDstList[planeIdx]); + } + + clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties( + ctx, gDeviceIDdetected, 0, &error); + if (error != CL_SUCCESS) + { + log_error("Unable to create command queue: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + if (!ImageInfoVerify(adapterType, memObjSrcList, width, height, + surfaceSrc, objectSrcHandle)) + { + log_error("Image info verification failed\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + for (size_t frameIdx = 0; frameIdx < iterationNum; ++frameIdx) + { + if (!YUVSurfaceSet(surfaceFormat, surfaceSrc, + bufferIn[frameIdx % FRAME_NUM], width, height)) + { + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + } + + error = clEnqueueAcquireDX9MediaSurfacesKHR( + cmdQueue, static_cast(memObjSrcList.size()), + &memObjSrcList[0], 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + error = clEnqueueAcquireDX9MediaSurfacesKHR( + cmdQueue, static_cast(memObjDstList.size()), + &memObjDstList[0], 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + clSamplerWrapper sampler = clCreateSampler( + ctx, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error); + if (error != CL_SUCCESS) + { + log_error("Unable to create sampler\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + clProgramWrapper program; + clKernelWrapper kernel; + const char *progPtr = PROGRAM_STR.c_str(); + if (create_single_kernel_helper(ctx, &program, &kernel, 1, + (const char **)&progPtr, + "TestFunction")) + result.ResultSub(CResult::TEST_FAIL); + + size_t bufferSize = sizeof(cl_int) * 2; + clMemWrapper imageRes = clCreateBuffer(ctx, CL_MEM_READ_WRITE, + bufferSize, NULL, &error); + if (error != CL_SUCCESS) + { + log_error("clCreateBuffer failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + size_t offset = 0; + size_t origin[3] = { 0, 0, 0 }; + std::vector out(frameSize, 0); + for (size_t i = 0; i < memObjSrcList.size(); ++i) + { + size_t planeWidth = (i == 0) ? width : width / 2; + size_t planeHeight = (i == 0) ? height : height / 2; + size_t regionPlane[3] = { planeWidth, planeHeight, 1 }; + size_t threads[2] = { planeWidth, planeHeight }; + + error = clSetKernelArg(kernel, 0, sizeof(memObjSrcList[i]), + &memObjSrcList[i]); + if (error != CL_SUCCESS) + { + log_error("Unable to set kernel arguments"); + result.ResultSub(CResult::TEST_FAIL); + } + + error = clSetKernelArg(kernel, 1, sizeof(memObjDstList[i]), + &memObjDstList[i]); + if (error != CL_SUCCESS) + { + log_error("Unable to set kernel arguments"); + result.ResultSub(CResult::TEST_FAIL); + } + + error = clSetKernelArg(kernel, 2, sizeof(sampler), &sampler); + if (error != CL_SUCCESS) + { + log_error("Unable to set kernel arguments"); + result.ResultSub(CResult::TEST_FAIL); + } + + error = clSetKernelArg(kernel, 3, sizeof(imageRes), &imageRes); + if (error != CL_SUCCESS) + { + log_error("Unable to set kernel arguments"); + result.ResultSub(CResult::TEST_FAIL); + } + + size_t localThreads[2]; + error = get_max_common_2D_work_group_size(ctx, kernel, threads, + localThreads); + if (error != CL_SUCCESS) + { + log_error("Unable to get work group size to use"); + result.ResultSub(CResult::TEST_FAIL); + } + + error = + clEnqueueNDRangeKernel(cmdQueue, kernel, 2, NULL, threads, + localThreads, 0, NULL, NULL); + if (error != CL_SUCCESS) + { + log_error("Unable to execute test kernel"); + result.ResultSub(CResult::TEST_FAIL); + } + + std::vector imageResOut(2, 0); + error = clEnqueueReadBuffer(cmdQueue, imageRes, CL_TRUE, 0, + bufferSize, &imageResOut[0], 0, + NULL, NULL); + if (error != CL_SUCCESS) + { + log_error("Unable to read buffer"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (imageResOut[0] != planeWidth) + { + log_error("Invalid width value, test = %i, expected = %i\n", + imageResOut[0], planeWidth); + result.ResultSub(CResult::TEST_FAIL); + } + + if (imageResOut[1] != planeHeight) + { + log_error( + "Invalid height value, test = %i, expected = %i\n", + imageResOut[1], planeHeight); + result.ResultSub(CResult::TEST_FAIL); + } + + error = clEnqueueReadImage(cmdQueue, memObjDstList[i], CL_TRUE, + origin, regionPlane, 0, 0, + &out[offset], 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueReadImage failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + offset += planeWidth * planeHeight; + } + + if (!YUVCompare(surfaceFormat, out, bufferExp[frameIdx % FRAME_NUM], + width, height)) + { + log_error( + "Frame idx: %i, OCL objects are different than expected\n", + frameIdx); + result.ResultSub(CResult::TEST_FAIL); + } + + error = clEnqueueReleaseDX9MediaSurfacesKHR( + cmdQueue, static_cast(memObjSrcList.size()), + &memObjSrcList[0], 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + error = clEnqueueReleaseDX9MediaSurfacesKHR( + cmdQueue, static_cast(memObjDstList.size()), + &memObjDstList[0], 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + std::vector bufferOut(frameSize, 0); + if (!YUVSurfaceGet(surfaceFormat, surfaceDst, bufferOut, width, + height)) + { + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + if (!YUVCompare(surfaceFormat, bufferOut, + bufferExp[frameIdx % FRAME_NUM], width, height)) + { + log_error( + "Frame idx: %i, media surface is different than expected\n", + frameIdx); + result.ResultSub(CResult::TEST_FAIL); + } + } + } + + if (deviceWrapper->Status() != DEVICE_PASS) + { + std::string adapterName; + AdapterToString(adapterType, adapterName); + if (deviceWrapper->Status() == DEVICE_FAIL) + { + log_error("%s init failed\n", adapterName.c_str()); + result.ResultSub(CResult::TEST_FAIL); + } + else + { + log_error("%s init incomplete due to unsupported device\n", + adapterName.c_str()); + result.ResultSub(CResult::TEST_NOTSUPPORTED); + } + } + + return result.Result(); +} + +int test_kernel(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + CResult result; + +#if defined(_WIN32) + // D3D9 + if (kernel_functions(deviceID, context, queue, num_elements, 10, 256, 256, + CL_ADAPTER_D3D9_KHR, SURFACE_FORMAT_NV12, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9, NV12, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (kernel_functions(deviceID, context, queue, num_elements, 3, 256, 256, + CL_ADAPTER_D3D9_KHR, SURFACE_FORMAT_YV12, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9, YV12, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + // D3D9EX + if (kernel_functions(deviceID, context, queue, num_elements, 5, 256, 512, + CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_NV12, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9EX, NV12, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (kernel_functions(deviceID, context, queue, num_elements, 7, 512, 256, + CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_NV12, + SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (D3D9EX, NV12, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (kernel_functions(deviceID, context, queue, num_elements, 10, 256, 256, + CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_YV12, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9EX, YV12, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (kernel_functions(deviceID, context, queue, num_elements, 15, 128, 128, + CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_YV12, + SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (D3D9EX, YV12, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + // DXVA + if (kernel_functions(deviceID, context, queue, num_elements, 20, 128, 128, + CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_NV12, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (DXVA, NV12, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (kernel_functions(deviceID, context, queue, num_elements, 40, 64, 64, + CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_NV12, + SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (DXVA, NV12, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (kernel_functions(deviceID, context, queue, num_elements, 5, 512, 512, + CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_YV12, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (DXVA, YV12, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (kernel_functions(deviceID, context, queue, num_elements, 2, 1024, 1024, + CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_YV12, + SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (DXVA, YV12, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + +#else + return TEST_NOT_IMPLEMENTED; +#endif + + return result.Result(); +} diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_get_device_ids.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_get_device_ids.cpp new file mode 100644 index 0000000000..613a602c69 --- /dev/null +++ b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_get_device_ids.cpp @@ -0,0 +1,220 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "utils.h" + +int get_device_ids(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + cl_dx9_media_adapter_type_khr adapterType) +{ + CResult result; + + std::auto_ptr deviceWrapper; + if (!DeviceCreate(adapterType, deviceWrapper)) + { + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + } + + cl_uint devicesExpectedNum = 0; + cl_int error = clGetDeviceIDs(gPlatformIDdetected, CL_DEVICE_TYPE_ALL, 0, 0, + &devicesExpectedNum); + if (error != CL_SUCCESS || devicesExpectedNum < 1) + { + log_error("clGetDeviceIDs failed: %s\n", IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + std::vector devicesExpected(devicesExpectedNum); + error = clGetDeviceIDs(gPlatformIDdetected, CL_DEVICE_TYPE_ALL, + devicesExpectedNum, &devicesExpected[0], 0); + if (error != CL_SUCCESS) + { + log_error("clGetDeviceIDs failed: %s\n", IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + while (deviceWrapper->AdapterNext()) + { + std::vector mediaAdapterTypes; + mediaAdapterTypes.push_back(adapterType); + + std::vector mediaDevices; + mediaDevices.push_back(deviceWrapper->Device()); + + // check if the test can be run on the adapter + if (CL_SUCCESS + != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, + deviceWrapper->Device(), result))) + { + return result.Result(); + } + + cl_uint devicesAllNum = 0; + error = clGetDeviceIDsFromDX9MediaAdapterKHR( + gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0], + CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, 0, 0, &devicesAllNum); + if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND) + { + log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + std::vector devicesAll; + if (devicesAllNum > 0) + { + devicesAll.resize(devicesAllNum); + error = clGetDeviceIDsFromDX9MediaAdapterKHR( + gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0], + CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, devicesAllNum, + &devicesAll[0], 0); + if (error != CL_SUCCESS) + { + log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + } + + cl_uint devicesPreferredNum = 0; + error = clGetDeviceIDsFromDX9MediaAdapterKHR( + gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0], + CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, 0, 0, + &devicesPreferredNum); + if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND) + { + log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + std::vector devicesPreferred; + if (devicesPreferredNum > 0) + { + devicesPreferred.resize(devicesPreferredNum); + error = clGetDeviceIDsFromDX9MediaAdapterKHR( + gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0], + CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, + devicesPreferredNum, &devicesPreferred[0], 0); + if (error != CL_SUCCESS) + { + log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + } + + if (devicesAllNum < devicesPreferredNum) + { + log_error("Invalid number of preferred devices. It should be a " + "subset of all devices\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + for (cl_uint i = 0; i < devicesPreferredNum; ++i) + { + cl_uint j = 0; + for (; j < devicesAllNum; ++j) + { + if (devicesPreferred[i] == devicesAll[j]) break; + } + + if (j == devicesAllNum) + { + log_error("Preferred device is not a subset of all devices\n"); + result.ResultSub(CResult::TEST_FAIL); + } + } + + for (cl_uint i = 0; i < devicesAllNum; ++i) + { + cl_uint j = 0; + for (; j < devicesExpectedNum; ++j) + { + if (devicesAll[i] == devicesExpected[j]) break; + } + + if (j == devicesExpectedNum) + { + log_error("CL_ALL_DEVICES_FOR_MEDIA_ADAPTER_KHR should be a " + "subset of all devices for selected platform\n"); + result.ResultSub(CResult::TEST_FAIL); + } + } + } + + if (deviceWrapper->Status() != DEVICE_PASS) + { + std::string adapterName; + AdapterToString(adapterType, adapterName); + if (deviceWrapper->Status() == DEVICE_FAIL) + { + log_error("%s init failed\n", adapterName.c_str()); + result.ResultSub(CResult::TEST_FAIL); + } + else + { + log_error("%s init incomplete due to unsupported device\n", + adapterName.c_str()); + result.ResultSub(CResult::TEST_NOTSUPPORTED); + } + } + + return result.Result(); +} + +int test_get_device_ids(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + CResult result; + +#if defined(_WIN32) + if (get_device_ids(deviceID, context, queue, num_elements, + CL_ADAPTER_D3D9_KHR) + != 0) + { + log_error("\nTest case (D3D9) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (get_device_ids(deviceID, context, queue, num_elements, + CL_ADAPTER_D3D9EX_KHR) + != 0) + { + log_error("\nTest case (D3D9EX) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (get_device_ids(deviceID, context, queue, num_elements, + CL_ADAPTER_DXVA_KHR) + != 0) + { + log_error("\nTest case (DXVA) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + +#else + return TEST_NOT_IMPLEMENTED; +#endif + + return result.Result(); +} diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_interop_sync.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_interop_sync.cpp new file mode 100644 index 0000000000..fbc616e2bf --- /dev/null +++ b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_interop_sync.cpp @@ -0,0 +1,419 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "utils.h" + +int interop_user_sync(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + unsigned int width, unsigned int height, + TContextFuncType functionCreate, + cl_dx9_media_adapter_type_khr adapterType, + TSurfaceFormat surfaceFormat, + TSharedHandleType sharedHandle, cl_bool userSync) +{ + CResult result; + + // create device + std::auto_ptr deviceWrapper; + if (!DeviceCreate(adapterType, deviceWrapper)) + { + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + } + + // generate input data + std::vector bufferIn(width * height * 3 / 2, 0); + if (!YUVGenerate(surfaceFormat, bufferIn, width, height, 0, 255)) + { + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + } + + while (deviceWrapper->AdapterNext()) + { + cl_int error; + // check if the test can be run on the adapter + if (CL_SUCCESS + != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, + deviceWrapper->Device(), result, + sharedHandle))) + { + return result.Result(); + } + + if (surfaceFormat != SURFACE_FORMAT_NV12 + && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat)) + { + std::string sharedHandleStr = + (sharedHandle == SHARED_HANDLE_ENABLED) ? "yes" : "no"; + std::string syncStr = (userSync == CL_TRUE) ? "yes" : "no"; + std::string formatStr; + std::string adapterStr; + SurfaceFormatToString(surfaceFormat, formatStr); + AdapterToString(adapterType, adapterStr); + log_info("Skipping test case, image format is not supported by a " + "device (adapter type: %s, format: %s, shared handle: %s, " + "user sync: %s)\n", + adapterStr.c_str(), formatStr.c_str(), + sharedHandleStr.c_str(), syncStr.c_str()); + return result.Result(); + } + + void *objectSharedHandle = 0; + std::auto_ptr surface; + if (!MediaSurfaceCreate( + adapterType, width, height, surfaceFormat, *deviceWrapper, + surface, (sharedHandle == SHARED_HANDLE_ENABLED) ? true : false, + &objectSharedHandle)) + { + log_error("Media surface creation failed for %i adapter\n", + deviceWrapper->AdapterIdx()); + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + } + + cl_context_properties contextProperties[] = { + CL_CONTEXT_PLATFORM, + (cl_context_properties)gPlatformIDdetected, + AdapterTypeToContextInfo(adapterType), + (cl_context_properties)deviceWrapper->Device(), + CL_CONTEXT_INTEROP_USER_SYNC, + userSync, + 0, + }; + + + clContextWrapper ctx; + switch (functionCreate) + { + case CONTEXT_CREATE_DEFAULT: + ctx = clCreateContext(&contextProperties[0], 1, + &gDeviceIDdetected, NULL, NULL, &error); + break; + case CONTEXT_CREATE_FROM_TYPE: + ctx = clCreateContextFromType(&contextProperties[0], + gDeviceTypeSelected, NULL, NULL, + &error); + break; + default: + log_error("Unknown context creation function enum\n"); + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + break; + } + + if (error != CL_SUCCESS) + { + std::string functionName; + FunctionContextCreateToString(functionCreate, functionName); + log_error("%s failed: %s\n", functionName.c_str(), + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + if (!YUVSurfaceSet(surfaceFormat, surface, bufferIn, width, height)) + { + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + } + +#if defined(_WIN32) + cl_dx9_surface_info_khr surfaceInfo; + surfaceInfo.resource = + *(static_cast(surface.get())); + surfaceInfo.shared_handle = objectSharedHandle; +#else + void *surfaceInfo = 0; + return TEST_NOT_IMPLEMENTED; +#endif + + std::vector memObjList; + unsigned int planesNum = PlanesNum(surfaceFormat); + std::vector planesList(planesNum); + for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx) + { + planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR( + ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfo, planeIdx, + &error); + if (error != CL_SUCCESS) + { + log_error( + "clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n", + planeIdx, IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + memObjList.push_back(planesList[planeIdx]); + } + + clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties( + ctx, gDeviceIDdetected, 0, &error); + if (error != CL_SUCCESS) + { + log_error("Unable to create command queue: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + if (!ImageInfoVerify(adapterType, memObjList, width, height, surface, + objectSharedHandle)) + { + log_error("Image info verification failed\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (userSync == CL_TRUE) + { +#if defined(_WIN32) + IDirect3DQuery9 *eventQuery = NULL; + switch (adapterType) + { + case CL_ADAPTER_D3D9_KHR: { + LPDIRECT3DDEVICE9 device = + (LPDIRECT3DDEVICE9)deviceWrapper->Device(); + device->CreateQuery(D3DQUERYTYPE_EVENT, &eventQuery); + eventQuery->Issue(D3DISSUE_END); + + while (S_FALSE + == eventQuery->GetData(NULL, 0, D3DGETDATA_FLUSH)) + ; + } + break; + case CL_ADAPTER_D3D9EX_KHR: { + LPDIRECT3DDEVICE9EX device = + (LPDIRECT3DDEVICE9EX)deviceWrapper->Device(); + device->CreateQuery(D3DQUERYTYPE_EVENT, &eventQuery); + eventQuery->Issue(D3DISSUE_END); + + while (S_FALSE + == eventQuery->GetData(NULL, 0, D3DGETDATA_FLUSH)) + ; + } + break; + case CL_ADAPTER_DXVA_KHR: { + CDXVAWrapper *DXVADevice = + dynamic_cast(&(*deviceWrapper)); + LPDIRECT3DDEVICE9EX device = + (LPDIRECT3DDEVICE9EX)(DXVADevice->D3D9()).Device(); + device->CreateQuery(D3DQUERYTYPE_EVENT, &eventQuery); + eventQuery->Issue(D3DISSUE_END); + + while (S_FALSE + == eventQuery->GetData(NULL, 0, D3DGETDATA_FLUSH)) + ; + } + break; + default: + log_error("Unknown adapter type\n"); + return false; + break; + } + if (eventQuery) + { + eventQuery->Release(); + } +#else + return TEST_NOT_IMPLEMENTED; +#endif + } + + error = clEnqueueAcquireDX9MediaSurfacesKHR( + cmdQueue, static_cast(memObjList.size()), + &memObjList.at(0), 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + size_t origin[3] = { 0, 0, 0 }; + size_t offset = 0; + size_t frameSize = width * height * 3 / 2; + std::vector out(frameSize, 0); + for (size_t i = 0; i < memObjList.size(); ++i) + { + size_t planeWidth = (i == 0) ? width : width / 2; + size_t planeHeight = (i == 0) ? height : height / 2; + size_t regionPlane[3] = { planeWidth, planeHeight, 1 }; + + error = + clEnqueueReadImage(cmdQueue, memObjList.at(i), CL_TRUE, origin, + regionPlane, 0, 0, &out.at(offset), 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueReadImage failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + offset += planeWidth * planeHeight; + } + + if (!YUVCompare(surfaceFormat, out, bufferIn, width, height)) + { + log_error("OCL object verification failed - clEnqueueReadImage\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + error = clEnqueueReleaseDX9MediaSurfacesKHR( + cmdQueue, static_cast(memObjList.size()), + &memObjList.at(0), 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + if (userSync == CL_TRUE) + { + error = clFinish(cmdQueue); + if (error != CL_SUCCESS) + { + log_error("clFinish failed: %s\n", IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + } + + // shared object verification + std::vector bufferOut(frameSize, 0); + if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut, width, height)) + { + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + if (!YUVCompare(surfaceFormat, bufferOut, bufferIn, width, height)) + { + log_error("Media surface is different than expected\n"); + result.ResultSub(CResult::TEST_FAIL); + } + } + + if (deviceWrapper->Status() != DEVICE_PASS) + { + std::string adapterName; + AdapterToString(adapterType, adapterName); + + if (deviceWrapper->Status() == DEVICE_FAIL) + { + log_error("%s init failed\n", adapterName.c_str()); + result.ResultSub(CResult::TEST_FAIL); + } + else + { + log_error("%s init incomplete due to unsupported device\n", + adapterName.c_str()); + result.ResultSub(CResult::TEST_NOTSUPPORTED); + } + } + + return result.Result(); +} + +int test_interop_user_sync(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + const unsigned int WIDTH = 256; + const unsigned int HEIGHT = 256; + + std::vector adapters; +#if defined(_WIN32) + adapters.push_back(CL_ADAPTER_D3D9_KHR); + adapters.push_back(CL_ADAPTER_D3D9EX_KHR); + adapters.push_back(CL_ADAPTER_DXVA_KHR); +#else + return TEST_NOT_IMPLEMENTED; +#endif + + std::vector contextFuncs; + contextFuncs.push_back(CONTEXT_CREATE_DEFAULT); + contextFuncs.push_back(CONTEXT_CREATE_FROM_TYPE); + + std::vector formats; + formats.push_back(SURFACE_FORMAT_NV12); + formats.push_back(SURFACE_FORMAT_YV12); + + std::vector sharedHandleTypes; + sharedHandleTypes.push_back(SHARED_HANDLE_DISABLED); + sharedHandleTypes.push_back(SHARED_HANDLE_ENABLED); + + std::vector sync; + sync.push_back(CL_FALSE); + sync.push_back(CL_TRUE); + + CResult result; + for (size_t adapterIdx = 0; adapterIdx < adapters.size(); ++adapterIdx) + { + // iteration through all create context functions + for (size_t contextFuncIdx = 0; contextFuncIdx < contextFuncs.size(); + ++contextFuncIdx) + { + // iteration through YUV formats + for (size_t formatIdx = 0; formatIdx < formats.size(); ++formatIdx) + { + // shared handle enabled or disabled + for (size_t sharedHandleIdx = 0; + sharedHandleIdx < sharedHandleTypes.size(); + ++sharedHandleIdx) + { + // user sync interop disabled or enabled + for (size_t syncIdx = 0; syncIdx < sync.size(); ++syncIdx) + { + if (adapters[adapterIdx] == CL_ADAPTER_D3D9_KHR + && sharedHandleTypes[sharedHandleIdx] + == SHARED_HANDLE_ENABLED) + continue; + + if (interop_user_sync( + deviceID, context, queue, num_elements, WIDTH, + HEIGHT, contextFuncs[contextFuncIdx], + adapters[adapterIdx], formats[formatIdx], + sharedHandleTypes[sharedHandleIdx], + sync[syncIdx]) + != 0) + { + std::string syncStr = (sync[syncIdx] == CL_TRUE) + ? "user sync enabled" + : "user sync disabled"; + std::string sharedHandle = + (sharedHandleTypes[sharedHandleIdx] + == SHARED_HANDLE_ENABLED) + ? "shared handle" + : "no shared handle"; + std::string adapterStr; + std::string formatStr; + SurfaceFormatToString(formats[formatIdx], + formatStr); + AdapterToString(adapters[adapterIdx], adapterStr); + + log_error("\nTest case - clCreateContext (%s, %s, " + "%s, %s) failed\n\n", + adapterStr.c_str(), formatStr.c_str(), + sharedHandle.c_str(), syncStr.c_str()); + result.ResultSub(CResult::TEST_FAIL); + } + } + } + } + } + } + + return result.Result(); +} diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_memory_access.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_memory_access.cpp new file mode 100644 index 0000000000..1e4e2c4ebc --- /dev/null +++ b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_memory_access.cpp @@ -0,0 +1,549 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "utils.h" + +int memory_access(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, unsigned int width, + unsigned int height, + cl_dx9_media_adapter_type_khr adapterType, + TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle) +{ + CResult result; + + std::auto_ptr deviceWrapper; + // creates device + if (!DeviceCreate(adapterType, deviceWrapper)) + { + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + } + + // generate input and expected data + size_t frameSize = width * height * 3 / 2; + std::vector bufferRef0(frameSize, 0); + std::vector bufferRef1(frameSize, 0); + std::vector bufferRef2(frameSize, 0); + if (!YUVGenerate(surfaceFormat, bufferRef0, width, height, 0, 90) + || !YUVGenerate(surfaceFormat, bufferRef1, width, height, 91, 180) + || !YUVGenerate(surfaceFormat, bufferRef2, width, height, 181, 255)) + { + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + } + + // iterates through all devices + while (deviceWrapper->AdapterNext()) + { + cl_int error; + // check if the test can be run on the adapter + if (CL_SUCCESS + != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, + deviceWrapper->Device(), result, + sharedHandle))) + { + return result.Result(); + } + + if (surfaceFormat != SURFACE_FORMAT_NV12 + && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat)) + { + std::string sharedHandleStr = + (sharedHandle == SHARED_HANDLE_ENABLED) ? "yes" : "no"; + std::string formatStr; + std::string adapterStr; + SurfaceFormatToString(surfaceFormat, formatStr); + AdapterToString(adapterType, adapterStr); + log_info( + "Skipping test case, image format is not supported by a device " + "(adapter type: %s, format: %s, shared handle: %s)\n", + adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str()); + return result.Result(); + } + + void *objectSharedHandle = 0; + std::auto_ptr surface; + + // creates surface + if (!MediaSurfaceCreate( + adapterType, width, height, surfaceFormat, *deviceWrapper, + surface, (sharedHandle == SHARED_HANDLE_ENABLED) ? true : false, + &objectSharedHandle)) + { + log_error("Media surface creation failed for %i adapter\n", + deviceWrapper->AdapterIdx()); + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + } + + if (!YUVSurfaceSet(surfaceFormat, surface, bufferRef0, width, height)) + { + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + } + + cl_context_properties contextProperties[] = { + CL_CONTEXT_PLATFORM, + (cl_context_properties)gPlatformIDdetected, + AdapterTypeToContextInfo(adapterType), + (cl_context_properties)deviceWrapper->Device(), + 0, + }; + + clContextWrapper ctx = clCreateContext( + &contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error); + if (error != CL_SUCCESS) + { + log_error("clCreateContext failed: %s\n", IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties( + ctx, gDeviceIDdetected, 0, &error); + if (error != CL_SUCCESS) + { + log_error("Unable to create command queue: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + { // memory access write +#if defined(_WIN32) + cl_dx9_surface_info_khr surfaceInfo; + surfaceInfo.resource = + *(static_cast(surface.get())); + surfaceInfo.shared_handle = objectSharedHandle; +#else + void *surfaceInfo = 0; + return TEST_NOT_IMPLEMENTED; +#endif + + std::vector memObjList; + unsigned int planesNum = PlanesNum(surfaceFormat); + std::vector planesList(planesNum); + for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx) + { + planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR( + ctx, CL_MEM_WRITE_ONLY, adapterType, &surfaceInfo, planeIdx, + &error); + if (error != CL_SUCCESS) + { + log_error("clCreateFromDX9MediaSurfaceKHR failed for " + "WRITE_ONLY plane %i: %s\n", + planeIdx, IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + memObjList.push_back(planesList[planeIdx]); + } + + error = clEnqueueAcquireDX9MediaSurfacesKHR( + cmdQueue, static_cast(memObjList.size()), + &memObjList[0], 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + size_t offset = 0; + size_t origin[3] = { 0, 0, 0 }; + for (size_t i = 0; i < memObjList.size(); ++i) + { + size_t planeWidth = (i == 0) ? width : width / 2; + size_t planeHeight = (i == 0) ? height : height / 2; + size_t regionPlane[3] = { planeWidth, planeHeight, 1 }; + + error = clEnqueueWriteImage(cmdQueue, memObjList[i], CL_TRUE, + origin, regionPlane, 0, 0, + &bufferRef1[offset], 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueWriteImage failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + offset += planeWidth * planeHeight; + } + + error = clEnqueueReleaseDX9MediaSurfacesKHR( + cmdQueue, static_cast(memObjList.size()), + &memObjList[0], 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + } + + std::vector bufferOut0(frameSize, 0); + if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut0, width, height)) + { + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + if (!YUVCompare(surfaceFormat, bufferOut0, bufferRef1, width, height)) + { + log_error("Media surface is different than expected\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + { // memory access read +#if defined(_WIN32) + cl_dx9_surface_info_khr surfaceInfo; + surfaceInfo.resource = + *(static_cast(surface.get())); + surfaceInfo.shared_handle = objectSharedHandle; +#else + void *surfaceInfo = 0; + return TEST_NOT_IMPLEMENTED; +#endif + + std::vector memObjList; + unsigned int planesNum = PlanesNum(surfaceFormat); + std::vector planesList(planesNum); + for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx) + { + planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR( + ctx, CL_MEM_READ_ONLY, adapterType, &surfaceInfo, planeIdx, + &error); + if (error != CL_SUCCESS) + { + log_error("clCreateFromDX9MediaSurfaceKHR failed for " + "READ_ONLY plane %i: %s\n", + planeIdx, IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + memObjList.push_back(planesList[planeIdx]); + } + + error = clEnqueueAcquireDX9MediaSurfacesKHR( + cmdQueue, static_cast(memObjList.size()), + &memObjList[0], 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + std::vector out(frameSize, 0); + size_t offset = 0; + size_t origin[3] = { 0, 0, 0 }; + + for (size_t i = 0; i < memObjList.size(); ++i) + { + size_t planeWidth = (i == 0) ? width : width / 2; + size_t planeHeight = (i == 0) ? height : height / 2; + size_t regionPlane[3] = { planeWidth, planeHeight, 1 }; + + error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, + origin, regionPlane, 0, 0, + &out[offset], 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueReadImage failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + offset += planeWidth * planeHeight; + } + + if (!YUVCompare(surfaceFormat, out, bufferRef1, width, height)) + { + log_error("OCL image (READ_ONLY) is different then expected\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + error = clEnqueueReleaseDX9MediaSurfacesKHR( + cmdQueue, static_cast(memObjList.size()), + &memObjList[0], 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + } + + std::vector bufferOut1(frameSize, 0); + if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut1, width, height)) + { + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + if (!YUVCompare(surfaceFormat, bufferOut1, bufferRef1, width, height)) + { + log_error("Media surface is different than expected\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + { // memory access read write +#if defined(_WIN32) + cl_dx9_surface_info_khr surfaceInfo; + surfaceInfo.resource = + *(static_cast(surface.get())); + surfaceInfo.shared_handle = objectSharedHandle; +#else + void *surfaceInfo = 0; + return TEST_NOT_IMPLEMENTED; +#endif + + std::vector memObjList; + unsigned int planesNum = PlanesNum(surfaceFormat); + std::vector planesList(planesNum); + for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx) + { + planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR( + ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfo, planeIdx, + &error); + if (error != CL_SUCCESS) + { + log_error("clCreateFromDX9MediaSurfaceKHR failed for " + "READ_WRITE plane %i: %s\n", + planeIdx, IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + memObjList.push_back(planesList[planeIdx]); + } + + error = clEnqueueAcquireDX9MediaSurfacesKHR( + cmdQueue, static_cast(memObjList.size()), + &memObjList[0], 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + { // read + std::vector out(frameSize, 0); + size_t offset = 0; + size_t origin[3] = { 0, 0, 0 }; + + for (size_t i = 0; i < memObjList.size(); ++i) + { + size_t planeWidth = (i == 0) ? width : width / 2; + size_t planeHeight = (i == 0) ? height : height / 2; + size_t regionPlane[3] = { planeWidth, planeHeight, 1 }; + + error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, + origin, regionPlane, 0, 0, + &out[offset], 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueReadImage failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + offset += planeWidth * planeHeight; + } + + if (!YUVCompare(surfaceFormat, out, bufferRef1, width, height)) + { + log_error( + "OCL image (READ_WRITE) is different then expected\n"); + result.ResultSub(CResult::TEST_FAIL); + } + } + + { // write + size_t offset = 0; + size_t origin[3] = { 0, 0, 0 }; + for (size_t i = 0; i < memObjList.size(); ++i) + { + size_t planeWidth = (i == 0) ? width : width / 2; + size_t planeHeight = (i == 0) ? height : height / 2; + size_t regionPlane[3] = { planeWidth, planeHeight, 1 }; + + error = clEnqueueWriteImage( + cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane, + 0, 0, &bufferRef2[offset], 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueWriteImage failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + offset += planeWidth * planeHeight; + } + } + + error = clEnqueueReleaseDX9MediaSurfacesKHR( + cmdQueue, static_cast(memObjList.size()), + &memObjList[0], 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + } + + std::vector bufferOut2(frameSize, 0); + if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut2, width, height)) + { + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + if (!YUVCompare(surfaceFormat, bufferOut2, bufferRef2, width, height)) + { + log_error("Media surface is different than expected\n"); + result.ResultSub(CResult::TEST_FAIL); + } + } + + if (deviceWrapper->Status() != DEVICE_PASS) + { + std::string adapterName; + AdapterToString(adapterType, adapterName); + if (deviceWrapper->Status() == DEVICE_FAIL) + { + log_error("%s init failed\n", adapterName.c_str()); + result.ResultSub(CResult::TEST_FAIL); + } + else + { + log_error("%s init incomplete due to unsupported device\n", + adapterName.c_str()); + result.ResultSub(CResult::TEST_NOTSUPPORTED); + } + } + + return result.Result(); +} + +int test_memory_access(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + CResult result; + +#if defined(_WIN32) + // D3D9 + if (memory_access(deviceID, context, queue, num_elements, 256, 256, + CL_ADAPTER_D3D9_KHR, SURFACE_FORMAT_NV12, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9, NV12, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (memory_access(deviceID, context, queue, num_elements, 512, 256, + CL_ADAPTER_D3D9_KHR, SURFACE_FORMAT_YV12, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9, YV12, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + // D3D9EX + if (memory_access(deviceID, context, queue, num_elements, 256, 512, + CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_NV12, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9EX, NV12, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (memory_access(deviceID, context, queue, num_elements, 512, 256, + CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_NV12, + SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (D3D9EX, NV12, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (memory_access(deviceID, context, queue, num_elements, 256, 256, + CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_YV12, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9EX, YV12, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (memory_access(deviceID, context, queue, num_elements, 128, 128, + CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_YV12, + SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (D3D9EX, YV12, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + // DXVA + if (memory_access(deviceID, context, queue, num_elements, 128, 128, + CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_NV12, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (DXVA, NV12, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (memory_access(deviceID, context, queue, num_elements, 64, 64, + CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_NV12, + SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (DXVA, NV12, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (memory_access(deviceID, context, queue, num_elements, 512, 512, + CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_YV12, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (DXVA, YV12, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (memory_access(deviceID, context, queue, num_elements, 1024, 1024, + CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_YV12, + SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (DXVA, YV12, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + +#else + return TEST_NOT_IMPLEMENTED; +#endif + + return result.Result(); +} diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_other_data_types.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_other_data_types.cpp new file mode 100644 index 0000000000..0e5d1d12ac --- /dev/null +++ b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_other_data_types.cpp @@ -0,0 +1,1319 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include +#include + +#include "harness/errorHelpers.h" +#include "harness/imageHelpers.h" +#include "harness/kernelHelpers.h" + +#include "utils.h" + +template +int other_data_types(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + unsigned int iterationNum, unsigned int width, + unsigned int height, + cl_dx9_media_adapter_type_khr adapterType, + TSurfaceFormat surfaceFormat, + TSharedHandleType sharedHandle) +{ + const unsigned int FRAME_NUM = 2; + const float MAX_VALUE = 0.6f; + const std::string PROGRAM_STR = + "__kernel void TestFunction( read_only image2d_t imageIn, write_only " + "image2d_t imageOut, " NL " sampler_t " + "sampler, __global int *imageRes)" NL "{" NL + " int w = get_global_id(0);" NL " int h = get_global_id(1);" NL + " int width = get_image_width(imageIn);" NL + " int height = get_image_height(imageOut);" NL + " float4 color0 = read_imagef(imageIn, sampler, (int2)(w,h)) - " + "0.2f;" NL " float4 color1 = read_imagef(imageIn, sampler, " + "(float2)(w,h)) - 0.2f;" NL + " color0 = (color0 == color1) ? color0: (float4)(0.5, 0.5, 0.5, " + "0.5);" NL " write_imagef(imageOut, (int2)(w,h), color0);" NL + " if(w == 0 && h == 0)" NL " {" NL " imageRes[0] = width;" NL + " imageRes[1] = height;" NL " }" NL "}"; + + CResult result; + + cl_image_format format; + if (!SurfaceFormatToOCL(surfaceFormat, format)) + { + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + } + + std::auto_ptr deviceWrapper; + if (!DeviceCreate(adapterType, deviceWrapper)) + { + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + } + + while (deviceWrapper->AdapterNext()) + { + cl_int error; + // check if the test can be run on the adapter + if (CL_SUCCESS + != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, + deviceWrapper->Device(), result, + sharedHandle))) + { + return result.Result(); + } + + cl_context_properties contextProperties[] = { + CL_CONTEXT_PLATFORM, + (cl_context_properties)gPlatformIDdetected, + AdapterTypeToContextInfo(adapterType), + (cl_context_properties)deviceWrapper->Device(), + 0, + }; + + clContextWrapper ctx = clCreateContext( + &contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error); + if (error != CL_SUCCESS) + { + log_error("clCreateContext failed: %s\n", IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties( + ctx, gDeviceIDdetected, 0, &error); + if (error != CL_SUCCESS) + { + log_error("Unable to create command queue: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + if (!SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat)) + { + std::string sharedHandleStr = + (sharedHandle == SHARED_HANDLE_ENABLED) ? "yes" : "no"; + std::string formatStr; + std::string adapterStr; + SurfaceFormatToString(surfaceFormat, formatStr); + AdapterToString(adapterType, adapterStr); + log_info( + "Skipping test case, image format is not supported by a device " + "(adapter type: %s, format: %s, shared handle: %s)\n", + adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str()); + return result.Result(); + } + + if (!ImageFormatCheck(ctx, CL_MEM_OBJECT_IMAGE2D, format)) + { + std::string sharedHandleStr = + (sharedHandle == SHARED_HANDLE_ENABLED) ? "yes" : "no"; + std::string formatStr; + std::string adapterStr; + SurfaceFormatToString(surfaceFormat, formatStr); + AdapterToString(adapterType, adapterStr); + log_info("Skipping test case, image format is not supported by OCL " + "(adapter type: %s, format: %s, shared handle: %s)\n", + adapterStr.c_str(), formatStr.c_str(), + sharedHandleStr.c_str()); + return result.Result(); + } + + if (format.image_channel_data_type == CL_HALF_FLOAT) + { + if (DetectFloatToHalfRoundingMode(cmdQueue)) + { + log_error("Unable to detect rounding mode\n"); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + } + + std::vector> bufferIn(FRAME_NUM); + std::vector> bufferExp(FRAME_NUM); + float step = MAX_VALUE / static_cast(FRAME_NUM); + unsigned int planeNum = ChannelNum(surfaceFormat); + for (size_t i = 0; i < FRAME_NUM; ++i) + { + DataGenerate(surfaceFormat, format.image_channel_data_type, + bufferIn[i], width, height, planeNum, step * i, + step * (i + 1)); + DataGenerate(surfaceFormat, format.image_channel_data_type, + bufferExp[i], width, height, planeNum, step * i, + step * (i + 1), 0.2f); + } + + void *objectSrcHandle = 0; + std::auto_ptr surfaceSrc; + if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, + *deviceWrapper, surfaceSrc, + (sharedHandle == SHARED_HANDLE_ENABLED) ? true + : false, + &objectSrcHandle)) + { + log_error("Media surface creation failed for %i adapter\n", + deviceWrapper->AdapterIdx()); + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + } + + void *objectDstHandle = 0; + std::auto_ptr surfaceDst; + if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, + *deviceWrapper, surfaceDst, + (sharedHandle == SHARED_HANDLE_ENABLED) ? true + : false, + &objectDstHandle)) + { + log_error("Media surface creation failed for %i adapter\n", + deviceWrapper->AdapterIdx()); + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + } + +#if defined(_WIN32) + cl_dx9_surface_info_khr surfaceSrcInfo; + CD3D9SurfaceWrapper *dx9SurfaceSrc = + (static_cast(surfaceSrc.get())); + surfaceSrcInfo.resource = *dx9SurfaceSrc; + surfaceSrcInfo.shared_handle = objectSrcHandle; + + cl_dx9_surface_info_khr surfaceDstInfo; + CD3D9SurfaceWrapper *dx9SurfaceDst = + (static_cast(surfaceDst.get())); + surfaceDstInfo.resource = *dx9SurfaceDst; + surfaceDstInfo.shared_handle = objectDstHandle; +#else + void *surfaceSrcInfo = 0; + void *surfaceDstInfo = 0; + return TEST_NOT_IMPLEMENTED; +#endif + + // create OCL shared object + clMemWrapper objectSrcShared = clCreateFromDX9MediaSurfaceKHR( + ctx, CL_MEM_READ_WRITE, adapterType, &surfaceSrcInfo, 0, &error); + if (error != CL_SUCCESS) + { + log_error("clCreateFromDX9MediaSurfaceKHR failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + clMemWrapper objectDstShared = clCreateFromDX9MediaSurfaceKHR( + ctx, CL_MEM_READ_WRITE, adapterType, &surfaceDstInfo, 0, &error); + if (error != CL_SUCCESS) + { + log_error("clCreateFromDX9MediaSurfaceKHR failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + std::vector memObjList; + memObjList.push_back(objectSrcShared); + memObjList.push_back(objectDstShared); + + if (!GetMemObjInfo(objectSrcShared, adapterType, surfaceSrc, + objectSrcHandle)) + { + log_error("Invalid memory object info\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (!GetImageInfo(objectSrcShared, format, sizeof(T) * planeNum, + width * sizeof(T) * planeNum, 0, width, height, 0, 0)) + { + log_error("clGetImageInfo failed\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + for (size_t frameIdx = 0; frameIdx < iterationNum; ++frameIdx) + { + // surface set +#if defined(_WIN32) + D3DLOCKED_RECT rect; + if (FAILED((*dx9SurfaceSrc)->LockRect(&rect, NULL, 0))) + { + log_error("Surface lock failed\n"); + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + } + + size_t pitch = rect.Pitch / sizeof(T); + size_t lineSize = width * planeNum * sizeof(T); + T *ptr = static_cast(rect.pBits); + + for (size_t y = 0; y < height; ++y) + memcpy(ptr + y * pitch, + &bufferIn[frameIdx % FRAME_NUM][y * width * planeNum], + lineSize); + + (*dx9SurfaceSrc)->UnlockRect(); +#else + void *surfaceInfo = 0; + return TEST_NOT_IMPLEMENTED; +#endif + + error = clEnqueueAcquireDX9MediaSurfacesKHR( + cmdQueue, static_cast(memObjList.size()), + &memObjList[0], 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueAcquireMediaSurfaceKHR failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + return result.Result(); + } + + size_t origin[3] = { 0, 0, 0 }; + size_t region[3] = { width, height, 1 }; + + { // read operation + std::vector out(planeNum * width * height, 0); + error = + clEnqueueReadImage(cmdQueue, objectSrcShared, CL_TRUE, + origin, region, 0, 0, &out[0], 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueReadImage failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + if (!DataCompare(surfaceFormat, format.image_channel_data_type, + out, bufferIn[frameIdx % FRAME_NUM], width, + height, planeNum)) + { + log_error("Frame idx: %i, OCL object is different then " + "expected\n", + frameIdx); + result.ResultSub(CResult::TEST_FAIL); + } + } + + { // write operation + error = clEnqueueWriteImage( + cmdQueue, objectSrcShared, CL_TRUE, origin, region, 0, 0, + &bufferExp[frameIdx % FRAME_NUM][0], 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueWriteImage failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + } + + { // kernel operations + clSamplerWrapper sampler = clCreateSampler( + ctx, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error); + if (error != CL_SUCCESS) + { + log_error("Unable to create sampler\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + size_t threads[2] = { width, height }; + clProgramWrapper program; + clKernelWrapper kernel; + const char *progPtr = PROGRAM_STR.c_str(); + if (create_single_kernel_helper(ctx, &program, &kernel, 1, + (const char **)&progPtr, + "TestFunction")) + result.ResultSub(CResult::TEST_FAIL); + + error = clSetKernelArg(kernel, 0, sizeof(objectSrcShared), + &(objectSrcShared)); + if (error != CL_SUCCESS) + { + log_error("Unable to set kernel arguments"); + result.ResultSub(CResult::TEST_FAIL); + } + + error = clSetKernelArg(kernel, 1, sizeof(objectDstShared), + &(objectDstShared)); + if (error != CL_SUCCESS) + { + log_error("Unable to set kernel arguments"); + result.ResultSub(CResult::TEST_FAIL); + } + + error = clSetKernelArg(kernel, 2, sizeof(sampler), &sampler); + if (error != CL_SUCCESS) + { + log_error("Unable to set kernel arguments"); + result.ResultSub(CResult::TEST_FAIL); + } + + size_t bufferSize = sizeof(cl_int) * 2; + clMemWrapper imageRes = clCreateBuffer( + ctx, CL_MEM_READ_WRITE, bufferSize, NULL, &error); + if (error != CL_SUCCESS) + { + log_error("clCreateBuffer failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + error = clSetKernelArg(kernel, 3, sizeof(imageRes), &imageRes); + + size_t localThreads[2]; + error = get_max_common_2D_work_group_size(ctx, kernel, threads, + localThreads); + if (error != CL_SUCCESS) + { + log_error("Unable to get work group size to use"); + result.ResultSub(CResult::TEST_FAIL); + } + + error = + clEnqueueNDRangeKernel(cmdQueue, kernel, 2, NULL, threads, + localThreads, 0, NULL, NULL); + if (error != CL_SUCCESS) + { + log_error("Unable to execute test kernel"); + result.ResultSub(CResult::TEST_FAIL); + } + + std::vector imageResOut(2, 0); + error = clEnqueueReadBuffer(cmdQueue, imageRes, CL_TRUE, 0, + bufferSize, &imageResOut[0], 0, + NULL, NULL); + if (error != CL_SUCCESS) + { + log_error("Unable to read buffer"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (imageResOut[0] != width) + { + log_error("Invalid width value, test = %i, expected = %i\n", + imageResOut[0], width); + result.ResultSub(CResult::TEST_FAIL); + } + + if (imageResOut[1] != height) + { + log_error( + "Invalid height value, test = %i, expected = %i\n", + imageResOut[1], height); + result.ResultSub(CResult::TEST_FAIL); + } + } + + { // map operation + size_t mapOrigin[3] = { 0, 0, 0 }; + size_t mapRegion[3] = { width, height, 1 }; + + std::vector out(width * height * planeNum, 0); + size_t rowPitch = 0; + size_t slicePitch = 0; + void *mapPtr = clEnqueueMapImage( + cmdQueue, objectDstShared, CL_TRUE, + CL_MAP_READ | CL_MAP_WRITE, mapOrigin, mapRegion, &rowPitch, + &slicePitch, 0, 0, 0, &error); + if (error != CL_SUCCESS) + { + log_error("clEnqueueMapImage failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + for (size_t y = 0; y < height; ++y) + memcpy(&out[y * width * planeNum], + static_cast(mapPtr) + y * rowPitch / sizeof(T), + width * planeNum * sizeof(T)); + + if (!DataCompare(surfaceFormat, format.image_channel_data_type, + out, bufferIn[frameIdx % FRAME_NUM], width, + height, planeNum)) + { + log_error("Frame idx: %i, Mapped OCL object is different " + "then expected\n", + frameIdx); + result.ResultSub(CResult::TEST_FAIL); + } + + for (size_t y = 0; y < height; ++y) + memcpy( + static_cast(mapPtr) + y * rowPitch / sizeof(T), + &bufferExp[frameIdx % FRAME_NUM][y * width * planeNum], + width * planeNum * sizeof(T)); + + error = clEnqueueUnmapMemObject(cmdQueue, objectDstShared, + mapPtr, 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueUnmapMemObject failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + } + + error = clEnqueueReleaseDX9MediaSurfacesKHR( + cmdQueue, static_cast(memObjList.size()), + &memObjList[0], 0, 0, 0); + if (error != CL_SUCCESS) + { + log_error("clEnqueueReleaseMediaSurfaceKHR failed: %s\n", + IGetErrorString(error)); + result.ResultSub(CResult::TEST_FAIL); + } + + std::vector out(width * height * planeNum, 0); + // surface get +#if defined(_WIN32) + if (FAILED((*dx9SurfaceDst)->LockRect(&rect, NULL, 0))) + { + log_error("Surface lock failed\n"); + result.ResultSub(CResult::TEST_ERROR); + return result.Result(); + } + + pitch = rect.Pitch / sizeof(T); + lineSize = width * planeNum * sizeof(T); + ptr = static_cast(rect.pBits); + for (size_t y = 0; y < height; ++y) + memcpy(&out[y * width * planeNum], ptr + y * pitch, lineSize); + + (*dx9SurfaceDst)->UnlockRect(); +#else + return TEST_NOT_IMPLEMENTED; +#endif + + if (!DataCompare(surfaceFormat, format.image_channel_data_type, out, + bufferExp[frameIdx % FRAME_NUM], width, height, + planeNum)) + { + log_error( + "Frame idx: %i, media object is different then expected\n", + frameIdx); + result.ResultSub(CResult::TEST_FAIL); + } + } + } + + if (deviceWrapper->Status() != DEVICE_PASS) + { + std::string adapterName; + AdapterToString(adapterType, adapterName); + if (deviceWrapper->Status() == DEVICE_FAIL) + { + log_error("%s init failed\n", adapterName.c_str()); + result.ResultSub(CResult::TEST_FAIL); + } + else + { + log_error("%s init incomplete due to unsupported device\n", + adapterName.c_str()); + result.ResultSub(CResult::TEST_NOTSUPPORTED); + } + } + + return result.Result(); +} + +int test_other_data_types(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + CResult result; + +#if defined(_WIN32) + // D3D9 + if (other_data_types(deviceID, context, queue, num_elements, 10, + 64, 256, CL_ADAPTER_D3D9_KHR, + SURFACE_FORMAT_R32F, SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9, R32F, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 256, 128, CL_ADAPTER_D3D9_KHR, + SURFACE_FORMAT_R16F, SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9, R16F, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 512, 256, CL_ADAPTER_D3D9_KHR, + SURFACE_FORMAT_L16, SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9, L16, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 256, 512, CL_ADAPTER_D3D9_KHR, + SURFACE_FORMAT_A8, SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9, A8, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 1024, 32, CL_ADAPTER_D3D9_KHR, + SURFACE_FORMAT_L8, SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9, L8, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types( + deviceID, context, queue, num_elements, 10, 32, 1024, + CL_ADAPTER_D3D9_KHR, SURFACE_FORMAT_G32R32F, SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9, G32R32F, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types( + deviceID, context, queue, num_elements, 10, 64, 64, + CL_ADAPTER_D3D9_KHR, SURFACE_FORMAT_G16R16F, SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9, G16R16F, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types( + deviceID, context, queue, num_elements, 10, 256, 256, + CL_ADAPTER_D3D9_KHR, SURFACE_FORMAT_G16R16, SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9, G16R16, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 512, 128, CL_ADAPTER_D3D9_KHR, + SURFACE_FORMAT_A8L8, SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9, A8L8, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 128, 512, CL_ADAPTER_D3D9_KHR, + SURFACE_FORMAT_A32B32G32R32F, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error( + "\nTest case (D3D9, A32B32G32R32F, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 128, 128, CL_ADAPTER_D3D9_KHR, + SURFACE_FORMAT_A16B16G16R16F, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error( + "\nTest case (D3D9, A16B16G16R16F, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 64, 128, CL_ADAPTER_D3D9_KHR, + SURFACE_FORMAT_A16B16G16R16, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error( + "\nTest case (D3D9, A16B16G16R16, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 128, 64, CL_ADAPTER_D3D9_KHR, + SURFACE_FORMAT_A8B8G8R8, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9, A8B8G8R8, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 16, 512, CL_ADAPTER_D3D9_KHR, + SURFACE_FORMAT_X8B8G8R8, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9, X8B8G8R8, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 512, 16, CL_ADAPTER_D3D9_KHR, + SURFACE_FORMAT_A8R8G8B8, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9, A8R8G8B8, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 256, 256, CL_ADAPTER_D3D9_KHR, + SURFACE_FORMAT_X8R8G8B8, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9, X8R8G8B8, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + // D3D9EX + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 64, 256, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_R32F, SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9EX, R32F, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 64, 256, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_R32F, SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (D3D9EX, R32F, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 256, 128, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_R16F, SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9EX, R16F, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 256, 128, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_R16F, SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (D3D9EX, R16F, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 512, 256, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_L16, SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9EX, L16, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 512, 256, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_L16, SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (D3D9EX, L16, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 256, 512, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_A8, SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9EX, A8, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 256, 512, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_A8, SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (D3D9EX, A8, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 1024, 32, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_L8, SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9EX, L8, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 1024, 32, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_L8, SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (D3D9EX, L8, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 32, 1024, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_G32R32F, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9EX, G32R32F, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 32, 1024, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_G32R32F, + SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (D3D9EX, G32R32F, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 64, 64, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_G16R16F, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9EX, G16R16F, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 64, 64, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_G16R16F, SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (D3D9EX, G16R16F, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 256, 256, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_G16R16, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9EX, G16R16, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types( + deviceID, context, queue, num_elements, 10, 256, 256, + CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_G16R16, SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (D3D9EX, G16R16, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 512, 128, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_A8L8, SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (D3D9EX, A8L8, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 512, 128, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_A8L8, SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (D3D9EX, A8L8, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 128, 512, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_A32B32G32R32F, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error( + "\nTest case (D3D9EX, A32B32G32R32F, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 128, 512, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_A32B32G32R32F, + SHARED_HANDLE_ENABLED) + != 0) + { + log_error( + "\nTest case (D3D9EX, A32B32G32R32F, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 128, 128, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_A16B16G16R16F, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error( + "\nTest case (D3D9EX, A16B16G16R16F, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 128, 128, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_A16B16G16R16F, + SHARED_HANDLE_ENABLED) + != 0) + { + log_error( + "\nTest case (D3D9EX, A16B16G16R16F, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 64, 128, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_A16B16G16R16, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error( + "\nTest case (D3D9EX, A16B16G16R16, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 64, 128, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_A16B16G16R16, + SHARED_HANDLE_ENABLED) + != 0) + { + log_error( + "\nTest case (D3D9EX, A16B16G16R16, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 128, 64, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_A8B8G8R8, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error( + "\nTest case (D3D9EX, A8B8G8R8, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 128, 64, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_A8B8G8R8, + SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (D3D9EX, A8B8G8R8, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 16, 512, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_X8B8G8R8, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error( + "\nTest case (D3D9EX, X8B8G8R8, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 16, 512, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_X8B8G8R8, + SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (D3D9EX, X8B8G8R8, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 512, 16, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_A8R8G8B8, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error( + "\nTest case (D3D9EX, A8R8G8B8, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 512, 16, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_A8R8G8B8, + SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (D3D9EX, A8R8G8B8, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 256, 256, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_X8R8G8B8, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error( + "\nTest case (D3D9EX, X8R8G8B8, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 256, 256, CL_ADAPTER_D3D9EX_KHR, + SURFACE_FORMAT_X8R8G8B8, + SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (D3D9EX, X8R8G8B8, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + // DXVA + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 64, 256, CL_ADAPTER_DXVA_KHR, + SURFACE_FORMAT_R32F, SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (DXVA, R32F, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 64, 256, CL_ADAPTER_DXVA_KHR, + SURFACE_FORMAT_R32F, SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (DXVA, R32F, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 256, 128, CL_ADAPTER_DXVA_KHR, + SURFACE_FORMAT_R16F, SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (DXVA, R16F, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 256, 128, CL_ADAPTER_DXVA_KHR, + SURFACE_FORMAT_R16F, SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (DXVA, R16F, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 512, 256, CL_ADAPTER_DXVA_KHR, + SURFACE_FORMAT_L16, SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (DXVA, L16, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 512, 256, CL_ADAPTER_DXVA_KHR, + SURFACE_FORMAT_L16, SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (DXVA, L16, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 256, 512, CL_ADAPTER_DXVA_KHR, + SURFACE_FORMAT_A8, SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (DXVA, A8, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 256, 512, CL_ADAPTER_DXVA_KHR, + SURFACE_FORMAT_A8, SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (DXVA, A8, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 1024, 32, CL_ADAPTER_DXVA_KHR, + SURFACE_FORMAT_L8, SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (DXVA, L8, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 1024, 32, CL_ADAPTER_DXVA_KHR, + SURFACE_FORMAT_L8, SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (DXVA, L8, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types( + deviceID, context, queue, num_elements, 10, 32, 1024, + CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_G32R32F, SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (DXVA, G32R32F, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types( + deviceID, context, queue, num_elements, 10, 32, 1024, + CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_G32R32F, SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (DXVA, G32R32F, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types( + deviceID, context, queue, num_elements, 10, 64, 64, + CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_G16R16F, SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (DXVA, G16R16F, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 64, 64, CL_ADAPTER_DXVA_KHR, + SURFACE_FORMAT_G16R16F, SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (DXVA, G16R16F, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types( + deviceID, context, queue, num_elements, 10, 256, 256, + CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_G16R16, SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (DXVA, G16R16, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types( + deviceID, context, queue, num_elements, 10, 256, 256, + CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_G16R16, SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (DXVA, G16R16, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 512, 128, CL_ADAPTER_DXVA_KHR, + SURFACE_FORMAT_A8L8, SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (DXVA, A8L8, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 512, 128, CL_ADAPTER_DXVA_KHR, + SURFACE_FORMAT_A8L8, SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (DXVA, A8L8, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 128, 512, CL_ADAPTER_DXVA_KHR, + SURFACE_FORMAT_A32B32G32R32F, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error( + "\nTest case (DXVA, A32B32G32R32F, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 128, 512, CL_ADAPTER_DXVA_KHR, + SURFACE_FORMAT_A32B32G32R32F, + SHARED_HANDLE_ENABLED) + != 0) + { + log_error( + "\nTest case (DXVA, A32B32G32R32F, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 128, 128, CL_ADAPTER_DXVA_KHR, + SURFACE_FORMAT_A16B16G16R16F, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error( + "\nTest case (DXVA, A16B16G16R16F, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 128, 128, CL_ADAPTER_DXVA_KHR, + SURFACE_FORMAT_A16B16G16R16F, + SHARED_HANDLE_ENABLED) + != 0) + { + log_error( + "\nTest case (DXVA, A16B16G16R16F, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 64, 128, CL_ADAPTER_DXVA_KHR, + SURFACE_FORMAT_A16B16G16R16, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error( + "\nTest case (DXVA, A16B16G16R16, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 64, 128, CL_ADAPTER_DXVA_KHR, + SURFACE_FORMAT_A16B16G16R16, + SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (DXVA, A16B16G16R16, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 128, 64, CL_ADAPTER_DXVA_KHR, + SURFACE_FORMAT_A8B8G8R8, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (DXVA, A8B8G8R8, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types( + deviceID, context, queue, num_elements, 10, 128, 64, + CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_A8B8G8R8, SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (DXVA, A8B8G8R8, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 16, 512, CL_ADAPTER_DXVA_KHR, + SURFACE_FORMAT_X8B8G8R8, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (DXVA, X8B8G8R8, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types( + deviceID, context, queue, num_elements, 10, 16, 512, + CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_X8B8G8R8, SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (DXVA, X8B8G8R8, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 512, 16, CL_ADAPTER_DXVA_KHR, + SURFACE_FORMAT_A8R8G8B8, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (DXVA, A8R8G8B8, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types( + deviceID, context, queue, num_elements, 10, 512, 16, + CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_A8R8G8B8, SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (DXVA, A8R8G8B8, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types(deviceID, context, queue, num_elements, 10, + 256, 256, CL_ADAPTER_DXVA_KHR, + SURFACE_FORMAT_X8R8G8B8, + SHARED_HANDLE_DISABLED) + != 0) + { + log_error("\nTest case (DXVA, X8R8G8B8, no shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + + if (other_data_types( + deviceID, context, queue, num_elements, 10, 256, 256, + CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_X8R8G8B8, SHARED_HANDLE_ENABLED) + != 0) + { + log_error("\nTest case (DXVA, X8R8G8B8, shared handle) failed\n\n"); + result.ResultSub(CResult::TEST_FAIL); + } + +#else + return TEST_NOT_IMPLEMENTED; +#endif + + return result.Result(); +} diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/utils.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/utils.cpp new file mode 100644 index 0000000000..87eb13c3ca --- /dev/null +++ b/test_conformance/extensions/cl_khr_dx9_media_sharing/utils.cpp @@ -0,0 +1,1664 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "utils.h" + +#include "harness/errorHelpers.h" +#include "harness/imageHelpers.h" +#include "harness/rounding_mode.h" + +#include + +#include + +static RoundingMode gFloatToHalfRoundingMode = kDefaultRoundingMode; + + +CResult::CResult(): _result(TEST_PASS), _resultLast(TEST_NORESULT) {} + +CResult::~CResult() {} + +CResult::TTestResult CResult::ResultLast() const { return _resultLast; } + +int CResult::Result() const +{ + switch (_result) + { + case TEST_NORESULT: + case TEST_NOTSUPPORTED: + case TEST_PASS: return 0; break; + case TEST_FAIL: return 1; break; + case TEST_ERROR: return 2; break; + default: return -1; break; + } +} + +void CResult::ResultSub(TTestResult result) +{ + _resultLast = result; + if (static_cast(result) > static_cast(_result)) _result = result; +} + +void FunctionContextCreateToString(TContextFuncType contextCreateFunction, + std::string &contextFunction) +{ + switch (contextCreateFunction) + { + case CONTEXT_CREATE_DEFAULT: contextFunction = "CreateContext"; break; + case CONTEXT_CREATE_FROM_TYPE: + contextFunction = "CreateContextFromType"; + break; + default: + contextFunction = "Unknown"; + log_error("FunctionContextCreateToString(): Unknown create " + "function enum!"); + break; + } +} + +void AdapterToString(cl_dx9_media_adapter_type_khr adapterType, + std::string &adapter) +{ + switch (adapterType) + { + case CL_ADAPTER_D3D9_KHR: adapter = "D3D9"; break; + case CL_ADAPTER_D3D9EX_KHR: adapter = "D3D9EX"; break; + case CL_ADAPTER_DXVA_KHR: adapter = "DXVA"; break; + default: + adapter = "Unknown"; + log_error("AdapterToString(): Unknown adapter type!"); + break; + } +} + +cl_context_info +AdapterTypeToContextInfo(cl_dx9_media_adapter_type_khr adapterType) +{ + switch (adapterType) + { + case CL_ADAPTER_D3D9_KHR: return CL_CONTEXT_ADAPTER_D3D9_KHR; break; + case CL_ADAPTER_D3D9EX_KHR: return CL_CONTEXT_ADAPTER_D3D9EX_KHR; break; + case CL_ADAPTER_DXVA_KHR: return CL_CONTEXT_ADAPTER_DXVA_KHR; break; + default: + log_error("AdapterTypeToContextInfo(): Unknown adapter type!"); + return 0; + break; + } +} + +void YUVGenerateNV12(std::vector &yuv, unsigned int width, + unsigned int height, cl_uchar valueMin, cl_uchar valueMax, + double valueAdd) +{ + yuv.clear(); + yuv.resize(width * height * 3 / 2, 0); + + double min = static_cast(valueMin); + double max = static_cast(valueMax); + double range = 255; + double add = static_cast(valueAdd * range); + double stepX = (max - min) / static_cast(width); + double stepY = (max - min) / static_cast(height); + + // generate Y plane + for (unsigned int i = 0; i < height; ++i) + { + unsigned int offset = i * width; + double valueYPlane0 = static_cast(stepY * i); + for (unsigned int j = 0; j < width; ++j) + { + double valueXPlane0 = static_cast(stepX * j); + yuv.at(offset + j) = static_cast( + min + valueXPlane0 / 2 + valueYPlane0 / 2 + add); + } + } + + // generate UV planes + for (unsigned int i = 0; i < height / 2; ++i) + { + unsigned int offset = width * height + i * width; + double valueYPlane1 = static_cast(stepY * i); + double valueYPlane2 = static_cast(stepY * (height / 2 + i)); + for (unsigned int j = 0; j < width / 2; ++j) + { + double valueXPlane1 = static_cast(stepX * j); + double valueXPlane2 = static_cast(stepX * (width / 2 + j)); + + yuv.at(offset + j * 2) = static_cast( + min + valueXPlane1 / 2 + valueYPlane1 / 2 + add); + yuv.at(offset + j * 2 + 1) = static_cast( + min + valueXPlane2 / 2 + valueYPlane2 / 2 + add); + } + } +} + +void YUVGenerateYV12(std::vector &yuv, unsigned int width, + unsigned int height, cl_uchar valueMin, cl_uchar valueMax, + double valueAdd /*= 0.0*/) +{ + yuv.clear(); + yuv.resize(width * height * 3 / 2, 0); + + double min = static_cast(valueMin); + double max = static_cast(valueMax); + double range = 255; + double add = static_cast(valueAdd * range); + double stepX = (max - min) / static_cast(width); + double stepY = (max - min) / static_cast(height); + + unsigned offset = 0; + + // generate Y plane + for (unsigned int i = 0; i < height; ++i) + { + unsigned int plane0Offset = offset + i * width; + double valueYPlane0 = static_cast(stepY * i); + for (unsigned int j = 0; j < width; ++j) + { + double valueXPlane0 = static_cast(stepX * j); + yuv.at(plane0Offset + j) = static_cast( + min + valueXPlane0 / 2 + valueYPlane0 / 2 + add); + } + } + + // generate V plane + offset += width * height; + for (unsigned int i = 0; i < height / 2; ++i) + { + unsigned int plane1Offset = offset + i * width / 2; + double valueYPlane1 = static_cast(stepY * i); + for (unsigned int j = 0; j < width / 2; ++j) + { + double valueXPlane1 = static_cast(stepX * j); + yuv.at(plane1Offset + j) = static_cast( + min + valueXPlane1 / 2 + valueYPlane1 / 2 + add); + } + } + + // generate U plane + offset += width * height / 4; + for (unsigned int i = 0; i < height / 2; ++i) + { + unsigned int plane2Offset = offset + i * width / 2; + double valueYPlane2 = static_cast(stepY * (height / 2 + i)); + for (unsigned int j = 0; j < width / 2; ++j) + { + double valueXPlane2 = static_cast(stepX * j); + yuv.at(plane2Offset + j) = static_cast( + min + valueXPlane2 / 2 + valueYPlane2 / 2 + add); + } + } +} + + +bool YUVGenerate(TSurfaceFormat surfaceFormat, std::vector &yuv, + unsigned int width, unsigned int height, cl_uchar valueMin, + cl_uchar valueMax, double valueAdd /*= 0.0*/) +{ + switch (surfaceFormat) + { + case SURFACE_FORMAT_NV12: + YUVGenerateNV12(yuv, width, height, valueMin, valueMax, valueAdd); + break; + case SURFACE_FORMAT_YV12: + YUVGenerateYV12(yuv, width, height, valueMin, valueMax, valueAdd); + break; + default: + log_error("YUVGenerate(): Invalid surface type\n"); + return false; + break; + } + + return true; +} + +bool YUVSurfaceSetNV12(std::auto_ptr &surface, + const std::vector &yuv, unsigned int width, + unsigned int height) +{ +#if defined(_WIN32) + CD3D9SurfaceWrapper *d3dSurface = + static_cast(surface.get()); + D3DLOCKED_RECT rect; + if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0))) + { + log_error("YUVSurfaceSetNV12(): Surface lock failed\n"); + return false; + } + + size_t pitch = rect.Pitch / sizeof(cl_uchar); + size_t lineSize = width * sizeof(cl_uchar); + cl_uchar *ptr = static_cast(rect.pBits); + for (size_t y = 0; y < height; ++y) + memcpy(ptr + y * pitch, &yuv.at(y * width), lineSize); + + for (size_t y = 0; y < height / 2; ++y) + memcpy(ptr + height * pitch + y * pitch, + &yuv.at(width * height + y * width), lineSize); + + (*d3dSurface)->UnlockRect(); + + return true; + +#else + return false; +#endif +} + +bool YUVSurfaceSetYV12(std::auto_ptr &surface, + const std::vector &yuv, unsigned int width, + unsigned int height) +{ +#if defined(_WIN32) + CD3D9SurfaceWrapper *d3dSurface = + static_cast(surface.get()); + D3DLOCKED_RECT rect; + if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0))) + { + log_error("YUVSurfaceSetYV12(): Surface lock failed!\n"); + return false; + } + + size_t pitch = rect.Pitch / sizeof(cl_uchar); + size_t pitchHalf = pitch / 2; + size_t lineSize = width * sizeof(cl_uchar); + size_t lineHalfSize = lineSize / 2; + size_t surfaceOffset = 0; + size_t yuvOffset = 0; + cl_uchar *ptr = static_cast(rect.pBits); + + for (size_t y = 0; y < height; ++y) + memcpy(ptr + surfaceOffset + y * pitch, &yuv.at(yuvOffset + y * width), + lineSize); + + surfaceOffset += height * pitch; + yuvOffset += width * height; + for (size_t y = 0; y < height / 2; ++y) + memcpy(ptr + surfaceOffset + y * pitchHalf, + &yuv.at(yuvOffset + y * lineHalfSize), lineHalfSize); + + surfaceOffset += pitchHalf * height / 2; + yuvOffset += width * height / 4; + for (size_t y = 0; y < height / 2; ++y) + memcpy(ptr + surfaceOffset + y * pitchHalf, + &yuv.at(yuvOffset + y * lineHalfSize), lineHalfSize); + + (*d3dSurface)->UnlockRect(); + + return true; + +#else + return false; +#endif +} + +bool YUVSurfaceSet(TSurfaceFormat surfaceFormat, + std::auto_ptr &surface, + const std::vector &yuv, unsigned int width, + unsigned int height) +{ + switch (surfaceFormat) + { + case SURFACE_FORMAT_NV12: + if (!YUVSurfaceSetNV12(surface, yuv, width, height)) return false; + break; + case SURFACE_FORMAT_YV12: + if (!YUVSurfaceSetYV12(surface, yuv, width, height)) return false; + break; + default: + log_error("YUVSurfaceSet(): Invalid surface type!\n"); + return false; + break; + } + + return true; +} + +bool YUVSurfaceGetNV12(std::auto_ptr &surface, + std::vector &yuv, unsigned int width, + unsigned int height) +{ +#if defined(_WIN32) + CD3D9SurfaceWrapper *d3dSurface = + static_cast(surface.get()); + D3DLOCKED_RECT rect; + if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0))) + { + log_error("YUVSurfaceGetNV12(): Surface lock failed!\n"); + return false; + } + + size_t pitch = rect.Pitch / sizeof(cl_uchar); + size_t lineSize = width * sizeof(cl_uchar); + cl_uchar *ptr = static_cast(rect.pBits); + size_t yuvOffset = 0; + size_t surfaceOffset = 0; + for (size_t y = 0; y < height; ++y) + memcpy(&yuv.at(yuvOffset + y * width), ptr + y * pitch, lineSize); + + yuvOffset += width * height; + surfaceOffset += pitch * height; + for (size_t y = 0; y < height / 2; ++y) + memcpy(&yuv.at(yuvOffset + y * width), ptr + surfaceOffset + y * pitch, + lineSize); + + (*d3dSurface)->UnlockRect(); + + return true; + +#else + return false; +#endif +} + +bool YUVSurfaceGetYV12(std::auto_ptr &surface, + std::vector &yuv, unsigned int width, + unsigned int height) +{ +#if defined(_WIN32) + CD3D9SurfaceWrapper *d3dSurface = + static_cast(surface.get()); + D3DLOCKED_RECT rect; + if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0))) + { + log_error("YUVSurfaceGetYV12(): Surface lock failed!\n"); + return false; + } + + size_t pitch = rect.Pitch / sizeof(cl_uchar); + size_t pitchHalf = pitch / 2; + size_t lineSize = width * sizeof(cl_uchar); + size_t lineHalfSize = lineSize / 2; + size_t surfaceOffset = 0; + size_t yuvOffset = 0; + cl_uchar *ptr = static_cast(rect.pBits); + + for (size_t y = 0; y < height; ++y) + memcpy(&yuv.at(yuvOffset + y * width), ptr + surfaceOffset + y * pitch, + lineSize); + + surfaceOffset += pitch * height; + yuvOffset += width * height; + for (size_t y = 0; y < height / 2; ++y) + memcpy(&yuv.at(yuvOffset + y * lineHalfSize), + ptr + surfaceOffset + y * pitchHalf, lineHalfSize); + + surfaceOffset += pitchHalf * height / 2; + yuvOffset += width * height / 4; + for (size_t y = 0; y < height / 2; ++y) + memcpy(&yuv.at(yuvOffset + y * lineHalfSize), + ptr + surfaceOffset + y * pitchHalf, lineHalfSize); + + (*d3dSurface)->UnlockRect(); + + return true; + +#else + return false; +#endif +} + +bool YUVSurfaceGet(TSurfaceFormat surfaceFormat, + std::auto_ptr &surface, + std::vector &yuv, unsigned int width, + unsigned int height) +{ + switch (surfaceFormat) + { + case SURFACE_FORMAT_NV12: + if (!YUVSurfaceGetNV12(surface, yuv, width, height)) return false; + break; + case SURFACE_FORMAT_YV12: + if (!YUVSurfaceGetYV12(surface, yuv, width, height)) return false; + break; + default: + log_error("YUVSurfaceGet(): Invalid surface type!\n"); + return false; + break; + } + + return true; +} + +bool YUVCompareNV12(const std::vector &yuvTest, + const std::vector &yuvRef, unsigned int width, + unsigned int height) +{ + // plane 0 verification + size_t offset = 0; + for (size_t y = 0; y < height; ++y) + { + size_t plane0Offset = offset + width * y; + for (size_t x = 0; x < width; ++x) + { + if (yuvTest[plane0Offset + x] != yuvRef[plane0Offset + x]) + { + log_error("Plane 0 (Y) is different than expected, reference " + "value: %i, test value: %i, x: %i, y: %i\n", + yuvRef[plane0Offset + x], yuvTest[plane0Offset + x], + x, y); + return false; + } + } + } + + // plane 1 and 2 verification + offset += width * height; + for (size_t y = 0; y < height / 2; ++y) + { + size_t plane12Offset = offset + width * y; + for (size_t x = 0; x < width / 2; ++x) + { + if (yuvTest.at(plane12Offset + 2 * x) + != yuvRef.at(plane12Offset + 2 * x)) + { + log_error("Plane 1 (U) is different than expected, reference " + "value: %i, test value: %i, x: %i, y: %i\n", + yuvRef[plane12Offset + 2 * x], + yuvTest[plane12Offset + 2 * x], x, y); + return false; + } + + if (yuvTest.at(plane12Offset + 2 * x + 1) + != yuvRef.at(plane12Offset + 2 * x + 1)) + { + log_error("Plane 2 (V) is different than expected, reference " + "value: %i, test value: %i, x: %i, y: %i\n", + yuvRef[plane12Offset + 2 * x + 1], + yuvTest[plane12Offset + 2 * x + 1], x, y); + return false; + } + } + } + + return true; +} + +bool YUVCompareYV12(const std::vector &yuvTest, + const std::vector &yuvRef, unsigned int width, + unsigned int height) +{ + // plane 0 verification + size_t offset = 0; + for (size_t y = 0; y < height; ++y) + { + size_t plane0Offset = width * y; + for (size_t x = 0; x < width; ++x) + { + if (yuvTest.at(plane0Offset + x) != yuvRef.at(plane0Offset + x)) + { + log_error("Plane 0 (Y) is different than expected, reference " + "value: %i, test value: %i, x: %i, y: %i\n", + yuvRef[plane0Offset + x], yuvTest[plane0Offset + x], + x, y); + return false; + } + } + } + + // plane 1 verification + offset += width * height; + for (size_t y = 0; y < height / 2; ++y) + { + size_t plane1Offset = offset + width * y / 2; + for (size_t x = 0; x < width / 2; ++x) + { + if (yuvTest.at(plane1Offset + x) != yuvRef.at(plane1Offset + x)) + { + log_error("Plane 1 (V) is different than expected, reference " + "value: %i, test value: %i, x: %i, y: %i\n", + yuvRef[plane1Offset + x], yuvTest[plane1Offset + x], + x, y); + return false; + } + } + } + + // plane 2 verification + offset += width * height / 4; + for (size_t y = 0; y < height / 2; ++y) + { + size_t plane2Offset = offset + width * y / 2; + for (size_t x = 0; x < width / 2; ++x) + { + if (yuvTest.at(plane2Offset + x) != yuvRef.at(plane2Offset + x)) + { + log_error("Plane 2 (U) is different than expected, reference " + "value: %i, test value: %i, x: %i, y: %i\n", + yuvRef[plane2Offset + x], yuvTest[plane2Offset + x], + x, y); + return false; + } + } + } + + return true; +} + +bool YUVCompare(TSurfaceFormat surfaceFormat, + const std::vector &yuvTest, + const std::vector &yuvRef, unsigned int width, + unsigned int height) +{ + switch (surfaceFormat) + { + case SURFACE_FORMAT_NV12: + if (!YUVCompareNV12(yuvTest, yuvRef, width, height)) + { + log_error("OCL object is different than expected!\n"); + return false; + } + break; + case SURFACE_FORMAT_YV12: + if (!YUVCompareYV12(yuvTest, yuvRef, width, height)) + { + log_error("OCL object is different than expected!\n"); + return false; + } + break; + default: + log_error("YUVCompare(): Invalid surface type!\n"); + return false; + break; + } + + return true; +} + +void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type, + std::vector &data, unsigned int width, + unsigned int height, unsigned int channelNum, + float cmin /*= 0.0f*/, float cmax /*= 1.0f*/, + float add /*= 0.0f*/) +{ + data.clear(); + data.reserve(width * height * channelNum); + + double valueMin = static_cast(cmin); + double valueMax = static_cast(cmax); + double stepX = (valueMax - valueMin) / static_cast(width); + double stepY = (valueMax - valueMin) / static_cast(height); + double valueAdd = static_cast(add); + for (unsigned int i = 0; i < height; ++i) + { + double valueY = static_cast(stepY * i); + for (unsigned int j = 0; j < width; ++j) + { + double valueX = static_cast(stepX * j); + switch (channelNum) + { + case 1: + data.push_back(static_cast(valueMin + valueX / 2 + + valueY / 2 + valueAdd)); + break; + case 2: + data.push_back( + static_cast(valueMin + valueX + valueAdd)); + data.push_back( + static_cast(valueMin + valueY + valueAdd)); + break; + case 4: + data.push_back( + static_cast(valueMin + valueX + valueAdd)); + data.push_back( + static_cast(valueMin + valueY + valueAdd)); + data.push_back( + static_cast(valueMin + valueX / 2 + valueAdd)); + data.push_back( + static_cast(valueMin + valueY / 2 + valueAdd)); + break; + default: + log_error("DataGenerate(): invalid channel number!"); + return; + break; + } + } + } +} + +void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type, + std::vector &data, unsigned int width, + unsigned int height, unsigned int channelNum, + float cmin /*= 0.0f*/, float cmax /*= 1.0f*/, + float add /*= 0.0f*/) +{ + data.clear(); + data.reserve(width * height * channelNum); + + double valueMin = static_cast(cmin); + double valueMax = static_cast(cmax); + double stepX = (valueMax - valueMin) / static_cast(width); + double stepY = (valueMax - valueMin) / static_cast(height); + + switch (type) + { + case CL_HALF_FLOAT: { + double valueAdd = static_cast(add); + + for (unsigned int i = 0; i < height; ++i) + { + double valueY = static_cast(stepY * i); + for (unsigned int j = 0; j < width; ++j) + { + double valueX = static_cast(stepX * j); + switch (channelNum) + { + case 1: + data.push_back(convert_float_to_half( + static_cast(valueMin + valueX / 2 + + valueY / 2 + valueAdd))); + break; + case 2: + data.push_back( + convert_float_to_half(static_cast( + valueMin + valueX + valueAdd))); + data.push_back( + convert_float_to_half(static_cast( + valueMin + valueY + valueAdd))); + break; + case 4: + data.push_back( + convert_float_to_half(static_cast( + valueMin + valueX + valueAdd))); + data.push_back( + convert_float_to_half(static_cast( + valueMin + valueY + valueAdd))); + data.push_back( + convert_float_to_half(static_cast( + valueMin + valueX / 2 + valueAdd))); + data.push_back( + convert_float_to_half(static_cast( + valueMin + valueY / 2 + valueAdd))); + break; + default: + log_error( + "DataGenerate(): invalid channel number!"); + return; + break; + } + } + } + break; + } + case CL_UNORM_INT16: { + double range = 65535; + double valueAdd = static_cast(add * range); + + for (unsigned int i = 0; i < height; ++i) + { + double valueY = static_cast(stepY * i * range); + for (unsigned int j = 0; j < width; ++j) + { + double valueX = static_cast(stepX * j * range); + switch (channelNum) + { + case 1: + data.push_back(static_cast( + valueMin + valueX / 2 + valueY / 2 + valueAdd)); + break; + case 2: + data.push_back(static_cast( + valueMin + valueX + valueAdd)); + data.push_back(static_cast( + valueMin + valueY + valueAdd)); + break; + case 4: + data.push_back(static_cast( + valueMin + valueX + valueAdd)); + data.push_back(static_cast( + valueMin + valueY + valueAdd)); + data.push_back(static_cast( + valueMin + valueX / 2 + valueAdd)); + data.push_back(static_cast( + valueMin + valueY / 2 + valueAdd)); + break; + default: + log_error( + "DataGenerate(): invalid channel number!"); + return; + break; + } + } + } + } + break; + default: + log_error("DataGenerate(): unknown data type!"); + return; + break; + } +} + +void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type, + std::vector &data, unsigned int width, + unsigned int height, unsigned int channelNum, + float cmin /*= 0.0f*/, float cmax /*= 1.0f*/, + float add /*= 0.0f*/) +{ + data.clear(); + data.reserve(width * height * channelNum); + + double valueMin = static_cast(cmin); + double valueMax = static_cast(cmax); + double stepX = (valueMax - valueMin) / static_cast(width); + double stepY = (valueMax - valueMin) / static_cast(height); + + double range = 255; + double valueAdd = static_cast(add * range); + + for (unsigned int i = 0; i < height; ++i) + { + double valueY = static_cast(stepY * i * range); + for (unsigned int j = 0; j < width; ++j) + { + double valueX = static_cast(stepX * j * range); + switch (channelNum) + { + case 1: + data.push_back(static_cast( + valueMin + valueX / 2 + valueY / 2 + valueAdd)); + break; + case 2: + data.push_back( + static_cast(valueMin + valueX + valueAdd)); + data.push_back( + static_cast(valueMin + valueY + valueAdd)); + break; + case 4: + data.push_back( + static_cast(valueMin + valueX + valueAdd)); + data.push_back( + static_cast(valueMin + valueY + valueAdd)); + data.push_back(static_cast(valueMin + valueX / 2 + + valueAdd)); + if (surfaceFormat == SURFACE_FORMAT_X8R8G8B8) + data.push_back(static_cast(0xff)); + else + data.push_back(static_cast( + valueMin + valueY / 2 + valueAdd)); + break; + default: + log_error("DataGenerate(): invalid channel number!"); + return; + break; + } + } + } +} + +bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type, + const std::vector &dataTest, + const std::vector &dataExp, unsigned int width, + unsigned int height, unsigned int channelNum) +{ + float epsilon = 0.000001f; + for (unsigned int i = 0; i < height; ++i) + { + unsigned int offset = i * width * channelNum; + for (unsigned int j = 0; j < width; ++j) + { + for (unsigned planeIdx = 0; planeIdx < channelNum; ++planeIdx) + { + if (abs(dataTest.at(offset + j * channelNum + planeIdx) + - dataExp.at(offset + j * channelNum + planeIdx)) + > epsilon) + { + log_error( + "Tested image is different than reference (x,y,plane) " + "= (%i,%i,%i), test value = %f, expected value = %f\n", + j, i, planeIdx, + dataTest[offset + j * channelNum + planeIdx], + dataExp[offset + j * channelNum + planeIdx]); + return false; + } + } + } + } + + return true; +} + +bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type, + const std::vector &dataTest, + const std::vector &dataExp, unsigned int width, + unsigned int height, unsigned int channelNum) +{ + switch (type) + { + case CL_HALF_FLOAT: { + float epsilon = 0.001f; + for (unsigned int i = 0; i < height; ++i) + { + unsigned int offset = i * width * channelNum; + for (unsigned int j = 0; j < width; ++j) + { + for (unsigned planeIdx = 0; planeIdx < channelNum; + ++planeIdx) + { + float test = cl_half_to_float( + dataTest.at(offset + j * channelNum + planeIdx)); + float ref = cl_half_to_float( + dataExp.at(offset + j * channelNum + planeIdx)); + if (abs(test - ref) > epsilon) + { + log_error("Tested image is different than " + "reference (x,y,plane) = " + "(%i,%i,%i), test value = %f, expected " + "value = %f\n", + j, i, planeIdx, test, ref); + return false; + } + } + } + } + } + break; + case CL_UNORM_INT16: { + cl_ushort epsilon = 1; + for (unsigned int i = 0; i < height; ++i) + { + unsigned int offset = i * width * channelNum; + for (unsigned int j = 0; j < width; ++j) + { + for (unsigned planeIdx = 0; planeIdx < channelNum; + ++planeIdx) + { + cl_ushort test = + dataTest.at(offset + j * channelNum + planeIdx); + cl_ushort ref = + dataExp.at(offset + j * channelNum + planeIdx); + if (abs(test - ref) > epsilon) + { + log_error("Tested image is different than " + "reference (x,y,plane) = (%i,%i,%i), " + "test value = %i, expected value = %i\n", + j, i, planeIdx, test, ref); + return false; + } + } + } + } + } + break; + default: + log_error("DataCompare(): Invalid data format!"); + return false; + break; + } + + return true; +} + +bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type, + const std::vector &dataTest, + const std::vector &dataExp, unsigned int width, + unsigned int height, unsigned int planeNum) +{ + for (unsigned int i = 0; i < height; ++i) + { + unsigned int offset = i * width * planeNum; + for (unsigned int j = 0; j < width; ++j) + { + for (unsigned planeIdx = 0; planeIdx < planeNum; ++planeIdx) + { + if (surfaceFormat == SURFACE_FORMAT_X8R8G8B8 && planeIdx == 3) + continue; + + cl_uchar test = dataTest.at(offset + j * planeNum + planeIdx); + cl_uchar ref = dataExp.at(offset + j * planeNum + planeIdx); + if (test != ref) + { + log_error( + "Tested image is different than reference (x,y,plane) " + "= (%i,%i,%i), test value = %i, expected value = %i\n", + j, i, planeIdx, test, ref); + return false; + } + } + } + } + + return true; +} + +bool GetImageInfo(cl_mem object, cl_image_format formatExp, + size_t elementSizeExp, size_t rowPitchExp, + size_t slicePitchExp, size_t widthExp, size_t heightExp, + size_t depthExp, unsigned int planeExp) +{ + bool result = true; + + cl_image_format format; + if (clGetImageInfo(object, CL_IMAGE_FORMAT, sizeof(cl_image_format), + &format, 0) + != CL_SUCCESS) + { + log_error("clGetImageInfo(CL_IMAGE_FORMAT) failed\n"); + result = false; + } + + if (formatExp.image_channel_order != format.image_channel_order + || formatExp.image_channel_data_type != format.image_channel_data_type) + { + log_error("Value of CL_IMAGE_FORMAT is different than expected\n"); + result = false; + } + + size_t elementSize = 0; + if (clGetImageInfo(object, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), + &elementSize, 0) + != CL_SUCCESS) + { + log_error("clGetImageInfo(CL_IMAGE_ELEMENT_SIZE) failed\n"); + result = false; + } + + if (elementSizeExp != elementSize) + { + log_error("Value of CL_IMAGE_ELEMENT_SIZE is different than expected " + "(size: %i, exp size: %i)\n", + elementSize, elementSizeExp); + result = false; + } + + size_t rowPitch = 0; + if (clGetImageInfo(object, CL_IMAGE_ROW_PITCH, sizeof(size_t), &rowPitch, 0) + != CL_SUCCESS) + { + log_error("clGetImageInfo(CL_IMAGE_ROW_PITCH) failed\n"); + result = false; + } + + if ((rowPitchExp == 0 && rowPitchExp != rowPitch) + || (rowPitchExp > 0 && rowPitchExp > rowPitch)) + { + log_error("Value of CL_IMAGE_ROW_PITCH is different than expected " + "(size: %i, exp size: %i)\n", + rowPitch, rowPitchExp); + result = false; + } + + size_t slicePitch = 0; + if (clGetImageInfo(object, CL_IMAGE_SLICE_PITCH, sizeof(size_t), + &slicePitch, 0) + != CL_SUCCESS) + { + log_error("clGetImageInfo(CL_IMAGE_SLICE_PITCH) failed\n"); + result = false; + } + + if ((slicePitchExp == 0 && slicePitchExp != slicePitch) + || (slicePitchExp > 0 && slicePitchExp > slicePitch)) + { + log_error("Value of CL_IMAGE_SLICE_PITCH is different than expected " + "(size: %i, exp size: %i)\n", + slicePitch, slicePitchExp); + result = false; + } + + size_t width = 0; + if (clGetImageInfo(object, CL_IMAGE_WIDTH, sizeof(size_t), &width, 0) + != CL_SUCCESS) + { + log_error("clGetImageInfo(CL_IMAGE_WIDTH) failed\n"); + result = false; + } + + if (widthExp != width) + { + log_error("Value of CL_IMAGE_WIDTH is different than expected (size: " + "%i, exp size: %i)\n", + width, widthExp); + result = false; + } + + size_t height = 0; + if (clGetImageInfo(object, CL_IMAGE_HEIGHT, sizeof(size_t), &height, 0) + != CL_SUCCESS) + { + log_error("clGetImageInfo(CL_IMAGE_HEIGHT) failed\n"); + result = false; + } + + if (heightExp != height) + { + log_error("Value of CL_IMAGE_HEIGHT is different than expected (size: " + "%i, exp size: %i)\n", + height, heightExp); + result = false; + } + + size_t depth = 0; + if (clGetImageInfo(object, CL_IMAGE_DEPTH, sizeof(size_t), &depth, 0) + != CL_SUCCESS) + { + log_error("clGetImageInfo(CL_IMAGE_DEPTH) failed\n"); + result = false; + } + + if (depthExp != depth) + { + log_error("Value of CL_IMAGE_DEPTH is different than expected (size: " + "%i, exp size: %i)\n", + depth, depthExp); + result = false; + } + + unsigned int plane = 99; + size_t paramSize = 0; + if (clGetImageInfo(object, CL_IMAGE_DX9_MEDIA_PLANE_KHR, + sizeof(unsigned int), &plane, ¶mSize) + != CL_SUCCESS) + { + log_error("clGetImageInfo(CL_IMAGE_MEDIA_SURFACE_PLANE_KHR) failed\n"); + result = false; + } + + if (planeExp != plane) + { + log_error("Value of CL_IMAGE_MEDIA_SURFACE_PLANE_KHR is different than " + "expected (plane: %i, exp plane: %i)\n", + plane, planeExp); + result = false; + } + + return result; +} + +bool GetMemObjInfo(cl_mem object, cl_dx9_media_adapter_type_khr adapterType, + std::auto_ptr &surface, + void *shareHandleExp) +{ + bool result = true; + switch (adapterType) + { + case CL_ADAPTER_D3D9_KHR: + case CL_ADAPTER_D3D9EX_KHR: + case CL_ADAPTER_DXVA_KHR: { +#if defined(_WIN32) + cl_dx9_surface_info_khr surfaceInfo; +#else + void *surfaceInfo = 0; + return false; +#endif + size_t paramSize = 0; + if (clGetMemObjectInfo(object, CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR, + sizeof(surfaceInfo), &surfaceInfo, + ¶mSize) + != CL_SUCCESS) + { + log_error("clGetImageInfo(CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR) " + "failed\n"); + result = false; + } + +#if defined(_WIN32) + CD3D9SurfaceWrapper *d3d9Surface = + static_cast(surface.get()); + if (*d3d9Surface != surfaceInfo.resource) + { + log_error( + "Invalid resource for CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR\n"); + result = false; + } + + if (shareHandleExp != surfaceInfo.shared_handle) + { + log_error("Invalid shared handle for " + "CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR\n"); + result = false; + } +#else + return false; +#endif + + if (paramSize != sizeof(surfaceInfo)) + { + log_error("Invalid CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR parameter " + "size: %i, expected: %i\n", + paramSize, sizeof(surfaceInfo)); + result = false; + } + + paramSize = 0; + cl_dx9_media_adapter_type_khr mediaAdapterType; + if (clGetMemObjectInfo(object, CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR, + sizeof(mediaAdapterType), &mediaAdapterType, + ¶mSize) + != CL_SUCCESS) + { + log_error("clGetImageInfo(CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR) " + "failed\n"); + result = false; + } + + if (adapterType != mediaAdapterType) + { + log_error("Invalid media adapter type for " + "CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR\n"); + result = false; + } + + if (paramSize != sizeof(mediaAdapterType)) + { + log_error("Invalid CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR parameter " + "size: %i, expected: %i\n", + paramSize, sizeof(mediaAdapterType)); + result = false; + } + } + break; + default: + log_error("GetMemObjInfo(): Unknown adapter type!\n"); + return false; + break; + } + + return result; +} + +bool ImageInfoVerify(cl_dx9_media_adapter_type_khr adapterType, + const std::vector &memObjList, unsigned int width, + unsigned int height, + std::auto_ptr &surface, + void *sharedHandle) +{ + if (memObjList.size() != 2 && memObjList.size() != 3) + { + log_error("ImageInfoVerify(): Invalid object list parameter\n"); + return false; + } + + cl_image_format formatPlane; + formatPlane.image_channel_data_type = CL_UNORM_INT8; + formatPlane.image_channel_order = CL_R; + + // plane 0 verification + if (!GetImageInfo(memObjList[0], formatPlane, sizeof(cl_uchar), + width * sizeof(cl_uchar), 0, width, height, 0, 0)) + { + log_error("clGetImageInfo failed\n"); + return false; + } + + switch (memObjList.size()) + { + case 2: { + formatPlane.image_channel_data_type = CL_UNORM_INT8; + formatPlane.image_channel_order = CL_RG; + if (!GetImageInfo(memObjList[1], formatPlane, sizeof(cl_uchar) * 2, + width * sizeof(cl_uchar), 0, width / 2, + height / 2, 0, 1)) + { + log_error("clGetImageInfo failed\n"); + return false; + } + } + break; + case 3: { + if (!GetImageInfo(memObjList[1], formatPlane, sizeof(cl_uchar), + width * sizeof(cl_uchar) / 2, 0, width / 2, + height / 2, 0, 1)) + { + log_error("clGetImageInfo failed\n"); + return false; + } + + if (!GetImageInfo(memObjList[2], formatPlane, sizeof(cl_uchar), + width * sizeof(cl_uchar) / 2, 0, width / 2, + height / 2, 0, 2)) + { + log_error("clGetImageInfo failed\n"); + return false; + } + } + break; + default: + log_error("ImageInfoVerify(): Invalid object list parameter\n"); + return false; + break; + } + + for (size_t i = 0; i < memObjList.size(); ++i) + { + if (!GetMemObjInfo(memObjList[i], adapterType, surface, sharedHandle)) + { + log_error("clGetMemObjInfo(%i) failed\n", i); + return false; + } + } + + return true; +} + +bool ImageFormatCheck(cl_context context, cl_mem_object_type imageType, + const cl_image_format imageFormatCheck) +{ + cl_uint imageFormatsNum = 0; + cl_int error = clGetSupportedImageFormats( + context, CL_MEM_READ_WRITE, imageType, 0, 0, &imageFormatsNum); + if (error != CL_SUCCESS) + { + log_error("clGetSupportedImageFormats failed\n"); + return false; + } + + if (imageFormatsNum < 1) + { + log_error("Invalid image format number returned by " + "clGetSupportedImageFormats\n"); + return false; + } + + std::vector imageFormats(imageFormatsNum); + error = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, imageType, + imageFormatsNum, &imageFormats[0], 0); + if (error != CL_SUCCESS) + { + log_error("clGetSupportedImageFormats failed\n"); + return false; + } + + for (cl_uint i = 0; i < imageFormatsNum; ++i) + { + if (imageFormats[i].image_channel_data_type + == imageFormatCheck.image_channel_data_type + && imageFormats[i].image_channel_order + == imageFormatCheck.image_channel_order) + { + return true; + } + } + + return false; +} + +unsigned int ChannelNum(TSurfaceFormat surfaceFormat) +{ + switch (surfaceFormat) + { + case SURFACE_FORMAT_R32F: + case SURFACE_FORMAT_R16F: + case SURFACE_FORMAT_L16: + case SURFACE_FORMAT_A8: + case SURFACE_FORMAT_L8: return 1; break; + case SURFACE_FORMAT_G32R32F: + case SURFACE_FORMAT_G16R16F: + case SURFACE_FORMAT_G16R16: + case SURFACE_FORMAT_A8L8: return 2; break; + case SURFACE_FORMAT_NV12: + case SURFACE_FORMAT_YV12: return 3; break; + case SURFACE_FORMAT_A32B32G32R32F: + case SURFACE_FORMAT_A16B16G16R16F: + case SURFACE_FORMAT_A16B16G16R16: + case SURFACE_FORMAT_A8B8G8R8: + case SURFACE_FORMAT_X8B8G8R8: + case SURFACE_FORMAT_A8R8G8B8: + case SURFACE_FORMAT_X8R8G8B8: return 4; break; + default: + log_error("ChannelNum(): unknown surface format!\n"); + return 0; + break; + } +} + +unsigned int PlanesNum(TSurfaceFormat surfaceFormat) +{ + switch (surfaceFormat) + { + case SURFACE_FORMAT_R32F: + case SURFACE_FORMAT_R16F: + case SURFACE_FORMAT_L16: + case SURFACE_FORMAT_A8: + case SURFACE_FORMAT_L8: + case SURFACE_FORMAT_G32R32F: + case SURFACE_FORMAT_G16R16F: + case SURFACE_FORMAT_G16R16: + case SURFACE_FORMAT_A8L8: + case SURFACE_FORMAT_A32B32G32R32F: + case SURFACE_FORMAT_A16B16G16R16F: + case SURFACE_FORMAT_A16B16G16R16: + case SURFACE_FORMAT_A8B8G8R8: + case SURFACE_FORMAT_X8B8G8R8: + case SURFACE_FORMAT_A8R8G8B8: + case SURFACE_FORMAT_X8R8G8B8: return 1; break; + case SURFACE_FORMAT_NV12: return 2; break; + case SURFACE_FORMAT_YV12: return 3; break; + default: + log_error("PlanesNum(): unknown surface format!\n"); + return 0; + break; + } +} + +#if defined(_WIN32) +D3DFORMAT SurfaceFormatToD3D(TSurfaceFormat surfaceFormat) +{ + switch (surfaceFormat) + { + case SURFACE_FORMAT_R32F: return D3DFMT_R32F; break; + case SURFACE_FORMAT_R16F: return D3DFMT_R16F; break; + case SURFACE_FORMAT_L16: return D3DFMT_L16; break; + case SURFACE_FORMAT_A8: return D3DFMT_A8; break; + case SURFACE_FORMAT_L8: return D3DFMT_L8; break; + case SURFACE_FORMAT_G32R32F: return D3DFMT_G32R32F; break; + case SURFACE_FORMAT_G16R16F: return D3DFMT_G16R16F; break; + case SURFACE_FORMAT_G16R16: return D3DFMT_G16R16; break; + case SURFACE_FORMAT_A8L8: return D3DFMT_A8L8; break; + case SURFACE_FORMAT_A32B32G32R32F: return D3DFMT_A32B32G32R32F; break; + case SURFACE_FORMAT_A16B16G16R16F: return D3DFMT_A16B16G16R16F; break; + case SURFACE_FORMAT_A16B16G16R16: return D3DFMT_A16B16G16R16; break; + case SURFACE_FORMAT_A8B8G8R8: return D3DFMT_A8B8G8R8; break; + case SURFACE_FORMAT_X8B8G8R8: return D3DFMT_X8B8G8R8; break; + case SURFACE_FORMAT_A8R8G8B8: return D3DFMT_A8R8G8B8; break; + case SURFACE_FORMAT_X8R8G8B8: return D3DFMT_X8R8G8B8; break; + case SURFACE_FORMAT_NV12: + return static_cast(MAKEFOURCC('N', 'V', '1', '2')); + break; + case SURFACE_FORMAT_YV12: + return static_cast(MAKEFOURCC('Y', 'V', '1', '2')); + break; + default: + log_error("SurfaceFormatToD3D(): unknown surface format!\n"); + return D3DFMT_R32F; + break; + } +} +#endif + +bool DeviceCreate(cl_dx9_media_adapter_type_khr adapterType, + std::auto_ptr &device) +{ + switch (adapterType) + { +#if defined(_WIN32) + case CL_ADAPTER_D3D9_KHR: + device = std::auto_ptr(new CD3D9Wrapper()); + break; + case CL_ADAPTER_D3D9EX_KHR: + device = std::auto_ptr(new CD3D9ExWrapper()); + break; + case CL_ADAPTER_DXVA_KHR: + device = std::auto_ptr(new CDXVAWrapper()); + break; +#endif + default: + log_error("DeviceCreate(): Unknown adapter type!\n"); + return false; + break; + } + + return device->Status(); +} + +bool SurfaceFormatCheck(cl_dx9_media_adapter_type_khr adapterType, + const CDeviceWrapper &device, + TSurfaceFormat surfaceFormat) +{ + switch (adapterType) + { +#if defined(_WIN32) + case CL_ADAPTER_D3D9_KHR: + case CL_ADAPTER_D3D9EX_KHR: + case CL_ADAPTER_DXVA_KHR: { + D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat); + LPDIRECT3D9 d3d9 = static_cast(device.D3D()); + D3DDISPLAYMODE d3ddm; + d3d9->GetAdapterDisplayMode(device.AdapterIdx(), &d3ddm); + + if (FAILED(d3d9->CheckDeviceFormat(D3DADAPTER_DEFAULT, + D3DDEVTYPE_HAL, d3ddm.Format, 0, + D3DRTYPE_SURFACE, d3dFormat))) + return false; + } + break; +#endif + default: + log_error("SurfaceFormatCheck(): Unknown adapter type!\n"); + return false; + break; + } + + return true; +} + +bool SurfaceFormatToOCL(TSurfaceFormat surfaceFormat, cl_image_format &format) +{ + switch (surfaceFormat) + { + case SURFACE_FORMAT_R32F: + format.image_channel_order = CL_R; + format.image_channel_data_type = CL_FLOAT; + break; + case SURFACE_FORMAT_R16F: + format.image_channel_order = CL_R; + format.image_channel_data_type = CL_HALF_FLOAT; + break; + case SURFACE_FORMAT_L16: + format.image_channel_order = CL_R; + format.image_channel_data_type = CL_UNORM_INT16; + break; + case SURFACE_FORMAT_A8: + format.image_channel_order = CL_A; + format.image_channel_data_type = CL_UNORM_INT8; + break; + case SURFACE_FORMAT_L8: + format.image_channel_order = CL_R; + format.image_channel_data_type = CL_UNORM_INT8; + break; + case SURFACE_FORMAT_G32R32F: + format.image_channel_order = CL_RG; + format.image_channel_data_type = CL_FLOAT; + break; + case SURFACE_FORMAT_G16R16F: + format.image_channel_order = CL_RG; + format.image_channel_data_type = CL_HALF_FLOAT; + break; + case SURFACE_FORMAT_G16R16: + format.image_channel_order = CL_RG; + format.image_channel_data_type = CL_UNORM_INT16; + break; + case SURFACE_FORMAT_A8L8: + format.image_channel_order = CL_RG; + format.image_channel_data_type = CL_UNORM_INT8; + break; + case SURFACE_FORMAT_A32B32G32R32F: + format.image_channel_order = CL_RGBA; + format.image_channel_data_type = CL_FLOAT; + break; + case SURFACE_FORMAT_A16B16G16R16F: + format.image_channel_order = CL_RGBA; + format.image_channel_data_type = CL_HALF_FLOAT; + break; + case SURFACE_FORMAT_A16B16G16R16: + format.image_channel_order = CL_RGBA; + format.image_channel_data_type = CL_UNORM_INT16; + break; + case SURFACE_FORMAT_A8B8G8R8: + format.image_channel_order = CL_RGBA; + format.image_channel_data_type = CL_UNORM_INT8; + break; + case SURFACE_FORMAT_X8B8G8R8: + format.image_channel_order = CL_RGBA; + format.image_channel_data_type = CL_UNORM_INT8; + break; + case SURFACE_FORMAT_A8R8G8B8: + format.image_channel_order = CL_BGRA; + format.image_channel_data_type = CL_UNORM_INT8; + break; + case SURFACE_FORMAT_X8R8G8B8: + format.image_channel_order = CL_BGRA; + format.image_channel_data_type = CL_UNORM_INT8; + break; + case SURFACE_FORMAT_NV12: + format.image_channel_order = CL_R; + format.image_channel_data_type = CL_UNORM_INT8; + break; + case SURFACE_FORMAT_YV12: + format.image_channel_order = CL_R; + format.image_channel_data_type = CL_UNORM_INT8; + break; + default: + log_error("SurfaceFormatToOCL(): Unknown surface format!\n"); + return false; + break; + } + + return true; +} + +void SurfaceFormatToString(TSurfaceFormat surfaceFormat, std::string &str) +{ + switch (surfaceFormat) + { + case SURFACE_FORMAT_R32F: str = "R32F"; break; + case SURFACE_FORMAT_R16F: str = "R16F"; break; + case SURFACE_FORMAT_L16: str = "L16"; break; + case SURFACE_FORMAT_A8: str = "A8"; break; + case SURFACE_FORMAT_L8: str = "L8"; break; + case SURFACE_FORMAT_G32R32F: str = "G32R32F"; break; + case SURFACE_FORMAT_G16R16F: str = "G16R16F"; break; + case SURFACE_FORMAT_G16R16: str = "G16R16"; break; + case SURFACE_FORMAT_A8L8: str = "A8L8"; break; + case SURFACE_FORMAT_A32B32G32R32F: str = "A32B32G32R32F"; break; + case SURFACE_FORMAT_A16B16G16R16F: str = "A16B16G16R16F"; break; + case SURFACE_FORMAT_A16B16G16R16: str = "A16B16G16R16"; break; + case SURFACE_FORMAT_A8B8G8R8: str = "A8B8G8R8"; break; + case SURFACE_FORMAT_X8B8G8R8: str = "X8B8G8R8"; break; + case SURFACE_FORMAT_A8R8G8B8: str = "A8R8G8B8"; break; + case SURFACE_FORMAT_X8R8G8B8: str = "X8R8G8B8"; break; + case SURFACE_FORMAT_NV12: str = "NV12"; break; + case SURFACE_FORMAT_YV12: str = "YV12"; break; + default: + log_error("SurfaceFormatToString(): unknown surface format!\n"); + str = "unknown"; + break; + } +} + +bool MediaSurfaceCreate(cl_dx9_media_adapter_type_khr adapterType, + unsigned int width, unsigned int height, + TSurfaceFormat surfaceFormat, CDeviceWrapper &device, + std::auto_ptr &surface, + bool sharedHandle, void **objectSharedHandle) +{ + switch (adapterType) + { +#if defined(_WIN32) + case CL_ADAPTER_D3D9_KHR: { + surface = + std::auto_ptr(new CD3D9SurfaceWrapper); + CD3D9SurfaceWrapper *d3dSurface = + static_cast(surface.get()); + HRESULT hr = 0; + D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat); + LPDIRECT3DDEVICE9 d3d9Device = (LPDIRECT3DDEVICE9)device.Device(); + hr = d3d9Device->CreateOffscreenPlainSurface( + width, height, d3dFormat, D3DPOOL_DEFAULT, &(*d3dSurface), + sharedHandle ? objectSharedHandle : 0); + + if (FAILED(hr)) + { + log_error("CreateOffscreenPlainSurface failed\n"); + return false; + } + } + break; + case CL_ADAPTER_D3D9EX_KHR: { + surface = + std::auto_ptr(new CD3D9SurfaceWrapper); + CD3D9SurfaceWrapper *d3dSurface = + static_cast(surface.get()); + HRESULT hr = 0; + D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat); + LPDIRECT3DDEVICE9EX d3d9ExDevice = + (LPDIRECT3DDEVICE9EX)device.Device(); + hr = d3d9ExDevice->CreateOffscreenPlainSurface( + width, height, d3dFormat, D3DPOOL_DEFAULT, &(*d3dSurface), + sharedHandle ? objectSharedHandle : 0); + + if (FAILED(hr)) + { + log_error("CreateOffscreenPlainSurface failed\n"); + return false; + } + } + break; + case CL_ADAPTER_DXVA_KHR: { + surface = + std::auto_ptr(new CD3D9SurfaceWrapper); + CD3D9SurfaceWrapper *d3dSurface = + static_cast(surface.get()); + HRESULT hr = 0; + D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat); + IDXVAHD_Device *dxvaDevice = (IDXVAHD_Device *)device.Device(); + hr = dxvaDevice->CreateVideoSurface( + width, height, d3dFormat, D3DPOOL_DEFAULT, 0, + DXVAHD_SURFACE_TYPE_VIDEO_INPUT, 1, &(*d3dSurface), + sharedHandle ? objectSharedHandle : 0); + + if (FAILED(hr)) + { + log_error("CreateVideoSurface failed\n"); + return false; + } + } + break; +#endif + default: + log_error("MediaSurfaceCreate(): Unknown adapter type!\n"); + return false; + break; + } + + return true; +} + +cl_int deviceExistForCLTest( + cl_platform_id platform, cl_dx9_media_adapter_type_khr media_adapters_type, + void *media_adapters, CResult &result, + TSharedHandleType sharedHandle /*default SHARED_HANDLE_ENABLED*/ +) +{ + cl_int _error; + cl_uint devicesAllNum = 0; + std::string sharedHandleStr = + (sharedHandle == SHARED_HANDLE_ENABLED) ? "yes" : "no"; + std::string adapterStr; + AdapterToString(media_adapters_type, adapterStr); + + _error = clGetDeviceIDsFromDX9MediaAdapterKHR( + platform, 1, &media_adapters_type, &media_adapters, + CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, 0, 0, &devicesAllNum); + + if (_error != CL_SUCCESS) + { + if (_error != CL_DEVICE_NOT_FOUND) + { + log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n", + IGetErrorString(_error)); + result.ResultSub(CResult::TEST_ERROR); + } + else + { + log_info("Skipping test case, device type is not supported by a " + "device (adapter type: %s, shared handle: %s)\n", + adapterStr.c_str(), sharedHandleStr.c_str()); + result.ResultSub(CResult::TEST_NOTSUPPORTED); + } + } + + return _error; +} diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/utils.h b/test_conformance/extensions/cl_khr_dx9_media_sharing/utils.h new file mode 100644 index 0000000000..56c0fc2c4d --- /dev/null +++ b/test_conformance/extensions/cl_khr_dx9_media_sharing/utils.h @@ -0,0 +1,215 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#ifndef __UTILS_KHR_MEDIA_H +#define __UTILS_KHR_MEDIA_H + +#include +#include +#include +#include +#include "wrappers.h" +#include "CL/cl_dx9_media_sharing.h" + +#include "harness/typeWrappers.h" + + +extern clGetDeviceIDsFromDX9MediaAdapterKHR_fn + clGetDeviceIDsFromDX9MediaAdapterKHR; +extern clCreateFromDX9MediaSurfaceKHR_fn clCreateFromDX9MediaSurfaceKHR; +extern clEnqueueAcquireDX9MediaSurfacesKHR_fn + clEnqueueAcquireDX9MediaSurfacesKHR; +extern clEnqueueReleaseDX9MediaSurfacesKHR_fn + clEnqueueReleaseDX9MediaSurfacesKHR; + +extern cl_platform_id gPlatformIDdetected; +extern cl_device_id gDeviceIDdetected; +extern cl_device_type gDeviceTypeSelected; + +#define NL "\n" +#define TEST_NOT_IMPLEMENTED -1 +#define TEST_NOT_SUPPORTED -2 + +enum TSurfaceFormat +{ + SURFACE_FORMAT_NV12, + SURFACE_FORMAT_YV12, + SURFACE_FORMAT_R32F, + SURFACE_FORMAT_R16F, + SURFACE_FORMAT_L16, + SURFACE_FORMAT_A8, + SURFACE_FORMAT_L8, + SURFACE_FORMAT_G32R32F, + SURFACE_FORMAT_G16R16F, + SURFACE_FORMAT_G16R16, + SURFACE_FORMAT_A8L8, + SURFACE_FORMAT_A32B32G32R32F, + SURFACE_FORMAT_A16B16G16R16F, + SURFACE_FORMAT_A16B16G16R16, + SURFACE_FORMAT_A8B8G8R8, + SURFACE_FORMAT_X8B8G8R8, + SURFACE_FORMAT_A8R8G8B8, + SURFACE_FORMAT_X8R8G8B8, +}; + +enum TContextFuncType +{ + CONTEXT_CREATE_DEFAULT, + CONTEXT_CREATE_FROM_TYPE, +}; + +enum TSharedHandleType +{ + SHARED_HANDLE_ENABLED, + SHARED_HANDLE_DISABLED, +}; + +class CResult { +public: + enum TTestResult + { + TEST_NORESULT, + TEST_NOTSUPPORTED, + TEST_PASS, + TEST_FAIL, + TEST_ERROR, + }; + + CResult(); + ~CResult(); + + void ResultSub(TTestResult result); + TTestResult ResultLast() const; + int Result() const; + +private: + TTestResult _result; + TTestResult _resultLast; +}; + +void FunctionContextCreateToString(TContextFuncType contextCreateFunction, + std::string &contextFunction); +void AdapterToString(cl_dx9_media_adapter_type_khr adapterType, + std::string &adapter); +cl_context_info +AdapterTypeToContextInfo(cl_dx9_media_adapter_type_khr adapterType); + +// YUV utils +void YUVGenerateNV12(std::vector &yuv, unsigned int width, + unsigned int height, cl_uchar valueMin, cl_uchar valueMax, + double valueAdd = 0.0); +void YUVGenerateYV12(std::vector &yuv, unsigned int width, + unsigned int height, cl_uchar valueMin, cl_uchar valueMax, + double valueAdd = 0.0); +bool YUVGenerate(TSurfaceFormat surfaceFormat, std::vector &yuv, + unsigned int width, unsigned int height, cl_uchar valueMin, + cl_uchar valueMax, double valueAdd = 0.0); +bool YUVSurfaceSetNV12(std::auto_ptr &surface, + const std::vector &yuv, unsigned int width, + unsigned int height); +bool YUVSurfaceSetYV12(std::auto_ptr &surface, + const std::vector &yuv, unsigned int width, + unsigned int height); +bool YUVSurfaceSet(TSurfaceFormat surfaceFormat, + std::auto_ptr &surface, + const std::vector &yuv, unsigned int width, + unsigned int height); +bool YUVSurfaceGetNV12(std::auto_ptr &surface, + std::vector &yuv, unsigned int width, + unsigned int height); +bool YUVSurfaceGetYV12(std::auto_ptr &surface, + std::vector &yuv, unsigned int width, + unsigned int height); +bool YUVSurfaceGet(TSurfaceFormat surfaceFormat, + std::auto_ptr &surface, + std::vector &yuv, unsigned int width, + unsigned int height); +bool YUVCompareNV12(const std::vector &yuvTest, + const std::vector &yuvRef, unsigned int width, + unsigned int height); +bool YUVCompareYV12(const std::vector &yuvTest, + const std::vector &yuvRef, unsigned int width, + unsigned int height); +bool YUVCompare(TSurfaceFormat surfaceFormat, + const std::vector &yuvTest, + const std::vector &yuvRef, unsigned int width, + unsigned int height); + +// other types utils +void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type, + std::vector &data, unsigned int width, + unsigned int height, unsigned int channelNum, + float cmin = 0.0f, float cmax = 1.0f, float add = 0.0f); +void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type, + std::vector &data, unsigned int width, + unsigned int height, unsigned int channelNum, + float cmin = 0.0f, float cmax = 1.0f, float add = 0.0f); +void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type, + std::vector &data, unsigned int width, + unsigned int height, unsigned int channelNum, + float cmin = 0.0f, float cmax = 1.0f, float add = 0.0f); +bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type, + const std::vector &dataTest, + const std::vector &dataExp, unsigned int width, + unsigned int height, unsigned int channelNum); +bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type, + const std::vector &dataTest, + const std::vector &dataExp, unsigned int width, + unsigned int height, unsigned int channelNum); +bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type, + const std::vector &dataTest, + const std::vector &dataExp, unsigned int width, + unsigned int height, unsigned int channelNum); + +bool GetImageInfo(cl_mem object, cl_image_format formatExp, + size_t elementSizeExp, size_t rowPitchExp, + size_t slicePitchExp, size_t widthExp, size_t heightExp, + size_t depthExp, unsigned int planeExp); +bool GetMemObjInfo(cl_mem object, cl_dx9_media_adapter_type_khr adapterType, + std::auto_ptr &surface, + void *shareHandleExp); +bool ImageInfoVerify(cl_dx9_media_adapter_type_khr adapterType, + const std::vector &memObjList, unsigned int width, + unsigned int height, + std::auto_ptr &surface, + void *sharedHandle); +bool ImageFormatCheck(cl_context context, cl_mem_object_type imageType, + const cl_image_format imageFormatCheck); +unsigned int ChannelNum(TSurfaceFormat surfaceFormat); +unsigned int PlanesNum(TSurfaceFormat surfaceFormat); + +#if defined(_WIN32) +D3DFORMAT SurfaceFormatToD3D(TSurfaceFormat surfaceFormat); +#endif + +bool DeviceCreate(cl_dx9_media_adapter_type_khr adapterType, + std::auto_ptr &device); +bool SurfaceFormatCheck(cl_dx9_media_adapter_type_khr adapterType, + const CDeviceWrapper &device, + TSurfaceFormat surfaceFormat); +bool SurfaceFormatToOCL(TSurfaceFormat surfaceFormat, cl_image_format &format); +void SurfaceFormatToString(TSurfaceFormat surfaceFormat, std::string &str); +bool MediaSurfaceCreate(cl_dx9_media_adapter_type_khr adapterType, + unsigned int width, unsigned int height, + TSurfaceFormat surfaceFormat, CDeviceWrapper &device, + std::auto_ptr &surface, + bool sharedHandle, void **objectSharedHandle); + +cl_int +deviceExistForCLTest(cl_platform_id platform, + cl_dx9_media_adapter_type_khr media_adapters_type, + void *media_adapters, CResult &result, + TSharedHandleType sharedHandle = SHARED_HANDLE_DISABLED); +#endif // __UTILS_KHR_MEDIA_H diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/wrappers.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/wrappers.cpp new file mode 100644 index 0000000000..e156584e72 --- /dev/null +++ b/test_conformance/extensions/cl_khr_dx9_media_sharing/wrappers.cpp @@ -0,0 +1,463 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "wrappers.h" +#include "harness/errorHelpers.h" + +LPCTSTR CDeviceWrapper::WINDOW_TITLE = _T( "cl_khr_dx9_media_sharing" ); +const int CDeviceWrapper::WINDOW_WIDTH = 256; +const int CDeviceWrapper::WINDOW_HEIGHT = 256; +CDeviceWrapper::TAccelerationType CDeviceWrapper::accelerationType = + CDeviceWrapper::ACCELERATION_HW; + +#if defined(_WIN32) +const D3DFORMAT CDXVAWrapper::RENDER_TARGET_FORMAT = D3DFMT_X8R8G8B8; +const D3DFORMAT CDXVAWrapper::VIDEO_FORMAT = D3DFMT_X8R8G8B8; +const unsigned int CDXVAWrapper::VIDEO_FPS = 60; +#endif + +#if defined(_WIN32) +static LRESULT WINAPI WndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam) +{ + switch (msg) + { + case WM_DESTROY: PostQuitMessage(0); return 0; + case WM_PAINT: ValidateRect(hWnd, 0); return 0; + default: break; + } + + return DefWindowProc(hWnd, msg, wParam, lParam); +} +#endif + +CDeviceWrapper::CDeviceWrapper() +#if defined(_WIN32) + : _hInstance(NULL), _hWnd(NULL) +#endif +{} + +void CDeviceWrapper::WindowInit() +{ +#if defined(_WIN32) + _hInstance = GetModuleHandle(NULL); + static WNDCLASSEX wc = { + sizeof(WNDCLASSEX), CS_CLASSDC, WndProc, 0L, 0L, + _hInstance, NULL, NULL, NULL, NULL, + WINDOW_TITLE, NULL + }; + + RegisterClassEx(&wc); + + _hWnd = CreateWindow(WINDOW_TITLE, WINDOW_TITLE, WS_OVERLAPPEDWINDOW, 0, 0, + WINDOW_WIDTH, WINDOW_HEIGHT, NULL, NULL, wc.hInstance, + NULL); + + if (!_hWnd) + { + log_error("Failed to create window"); + return; + } + + ShowWindow(_hWnd, SW_SHOWDEFAULT); + UpdateWindow(_hWnd); +#endif +} + +void CDeviceWrapper::WindowDestroy() +{ +#if defined(_WIN32) + if (_hWnd) DestroyWindow(_hWnd); + _hWnd = NULL; +#endif +} + +#if defined(_WIN32) +HWND CDeviceWrapper::WindowHandle() const { return _hWnd; } +#endif + +int CDeviceWrapper::WindowWidth() const { return WINDOW_WIDTH; } + +int CDeviceWrapper::WindowHeight() const { return WINDOW_HEIGHT; } + +CDeviceWrapper::TAccelerationType CDeviceWrapper::AccelerationType() +{ + return accelerationType; +} + +void CDeviceWrapper::AccelerationType(TAccelerationType accelerationTypeNew) +{ + accelerationType = accelerationTypeNew; +} + +CDeviceWrapper::~CDeviceWrapper() { WindowDestroy(); } + +#if defined(_WIN32) +CD3D9Wrapper::CD3D9Wrapper() + : _d3d9(NULL), _d3dDevice(NULL), _status(DEVICE_PASS), _adapterIdx(0), + _adapterFound(false) +{ + WindowInit(); + + _d3d9 = Direct3DCreate9(D3D_SDK_VERSION); + if (!_d3d9) + { + log_error("Direct3DCreate9 failed\n"); + _status = DEVICE_FAIL; + } +} + +CD3D9Wrapper::~CD3D9Wrapper() +{ + Destroy(); + + if (_d3d9) _d3d9->Release(); + _d3d9 = 0; +} + +void CD3D9Wrapper::Destroy() +{ + if (_d3dDevice) _d3dDevice->Release(); + _d3dDevice = 0; +} + +cl_int CD3D9Wrapper::Init() +{ + if (!WindowHandle()) + { + log_error("D3D9: Window is not created\n"); + _status = DEVICE_FAIL; + return DEVICE_FAIL; + } + + if (!_d3d9 || DEVICE_PASS != _status || !_adapterFound) return false; + + _d3d9->GetAdapterDisplayMode(_adapterIdx - 1, &_d3ddm); + + D3DPRESENT_PARAMETERS d3dParams; + ZeroMemory(&d3dParams, sizeof(d3dParams)); + + d3dParams.Windowed = TRUE; + d3dParams.BackBufferCount = 1; + d3dParams.SwapEffect = D3DSWAPEFFECT_DISCARD; + d3dParams.hDeviceWindow = WindowHandle(); + d3dParams.BackBufferWidth = WindowWidth(); + d3dParams.BackBufferHeight = WindowHeight(); + d3dParams.BackBufferFormat = _d3ddm.Format; + + DWORD processingType = (AccelerationType() == ACCELERATION_HW) + ? D3DCREATE_HARDWARE_VERTEXPROCESSING + : D3DCREATE_SOFTWARE_VERTEXPROCESSING; + + if (FAILED(_d3d9->CreateDevice(_adapterIdx - 1, D3DDEVTYPE_HAL, + WindowHandle(), processingType, &d3dParams, + &_d3dDevice))) + { + log_error("CreateDevice failed\n"); + _status = DEVICE_FAIL; + return DEVICE_FAIL; + } + + _d3dDevice->BeginScene(); + _d3dDevice->Clear(0, NULL, D3DCLEAR_TARGET, 0, 1.0f, 0); + _d3dDevice->EndScene(); + + return true; +} + +void *CD3D9Wrapper::D3D() const { return _d3d9; } + +void *CD3D9Wrapper::Device() const { return _d3dDevice; } + +D3DFORMAT CD3D9Wrapper::Format() { return _d3ddm.Format; } + +D3DADAPTER_IDENTIFIER9 CD3D9Wrapper::Adapter() { return _adapter; } + +TDeviceStatus CD3D9Wrapper::Status() const { return _status; } + +bool CD3D9Wrapper::AdapterNext() +{ + if (DEVICE_PASS != _status) return false; + + _adapterFound = false; + for (; _adapterIdx < _d3d9->GetAdapterCount();) + { + ++_adapterIdx; + D3DCAPS9 caps; + if (FAILED( + _d3d9->GetDeviceCaps(_adapterIdx - 1, D3DDEVTYPE_HAL, &caps))) + continue; + + if (FAILED(_d3d9->GetAdapterIdentifier(_adapterIdx - 1, 0, &_adapter))) + { + log_error("D3D9: GetAdapterIdentifier failed\n"); + _status = DEVICE_FAIL; + return false; + } + + _adapterFound = true; + + Destroy(); + if (!Init()) + { + _status = DEVICE_FAIL; + _adapterFound = false; + } + break; + } + + return _adapterFound; +} + +unsigned int CD3D9Wrapper::AdapterIdx() const { return _adapterIdx - 1; } + + +CD3D9ExWrapper::CD3D9ExWrapper() + : _d3d9Ex(NULL), _d3dDeviceEx(NULL), _status(DEVICE_PASS), _adapterIdx(0), + _adapterFound(false) +{ + WindowInit(); + + HRESULT result = Direct3DCreate9Ex(D3D_SDK_VERSION, &_d3d9Ex); + if (FAILED(result) || !_d3d9Ex) + { + log_error("Direct3DCreate9Ex failed\n"); + _status = DEVICE_FAIL; + } +} + +CD3D9ExWrapper::~CD3D9ExWrapper() +{ + Destroy(); + + if (_d3d9Ex) _d3d9Ex->Release(); + _d3d9Ex = 0; +} + +void *CD3D9ExWrapper::D3D() const { return _d3d9Ex; } + +void *CD3D9ExWrapper::Device() const { return _d3dDeviceEx; } + +D3DFORMAT CD3D9ExWrapper::Format() { return _d3ddmEx.Format; } + +D3DADAPTER_IDENTIFIER9 CD3D9ExWrapper::Adapter() { return _adapter; } + +cl_int CD3D9ExWrapper::Init() +{ + if (!WindowHandle()) + { + log_error("D3D9EX: Window is not created\n"); + _status = DEVICE_FAIL; + return DEVICE_FAIL; + } + + if (!_d3d9Ex || DEVICE_FAIL == _status || !_adapterFound) + return DEVICE_FAIL; + + RECT rect; + GetClientRect(WindowHandle(), &rect); + + D3DPRESENT_PARAMETERS d3dParams; + ZeroMemory(&d3dParams, sizeof(d3dParams)); + + d3dParams.Windowed = TRUE; + d3dParams.SwapEffect = D3DSWAPEFFECT_FLIP; + d3dParams.BackBufferFormat = D3DFMT_X8R8G8B8; + d3dParams.BackBufferWidth = WindowWidth(); + d3dParams.BackBufferHeight = WindowHeight(); + + d3dParams.BackBufferCount = 1; + d3dParams.hDeviceWindow = WindowHandle(); + + DWORD processingType = (AccelerationType() == ACCELERATION_HW) + ? D3DCREATE_HARDWARE_VERTEXPROCESSING + : D3DCREATE_SOFTWARE_VERTEXPROCESSING; + + if (FAILED(_d3d9Ex->CreateDeviceEx(_adapterIdx - 1, D3DDEVTYPE_HAL, + WindowHandle(), processingType, + &d3dParams, NULL, &_d3dDeviceEx))) + { + log_error("CreateDeviceEx failed\n"); + _status = DEVICE_FAIL; + return DEVICE_FAIL; + } + + _d3dDeviceEx->BeginScene(); + _d3dDeviceEx->Clear(0, NULL, D3DCLEAR_TARGET, 0, 1.0f, 0); + _d3dDeviceEx->EndScene(); + + return DEVICE_PASS; +} + +void CD3D9ExWrapper::Destroy() +{ + if (_d3dDeviceEx) _d3dDeviceEx->Release(); + _d3dDeviceEx = 0; +} + +TDeviceStatus CD3D9ExWrapper::Status() const { return _status; } + +bool CD3D9ExWrapper::AdapterNext() +{ + if (DEVICE_FAIL == _status) return false; + + _adapterFound = false; + for (; _adapterIdx < _d3d9Ex->GetAdapterCount();) + { + ++_adapterIdx; + D3DCAPS9 caps; + if (FAILED( + _d3d9Ex->GetDeviceCaps(_adapterIdx - 1, D3DDEVTYPE_HAL, &caps))) + continue; + + if (FAILED( + _d3d9Ex->GetAdapterIdentifier(_adapterIdx - 1, 0, &_adapter))) + { + log_error("D3D9EX: GetAdapterIdentifier failed\n"); + _status = DEVICE_FAIL; + return false; + } + + _adapterFound = true; + Destroy(); + if (!Init()) + { + _status = DEVICE_FAIL; + _adapterFound = _status; + } + + break; + } + + return _adapterFound; +} + +unsigned int CD3D9ExWrapper::AdapterIdx() const { return _adapterIdx - 1; } + +CDXVAWrapper::CDXVAWrapper() + : _dxvaDevice(NULL), _status(DEVICE_PASS), _adapterFound(false) +{ + _status = _d3d9.Status(); +} + +CDXVAWrapper::~CDXVAWrapper() { DXVAHDDestroy(); } + +void *CDXVAWrapper::Device() const { return _dxvaDevice; } + +TDeviceStatus CDXVAWrapper::Status() const +{ + if (_status == DEVICE_FAIL || _d3d9.Status() == DEVICE_FAIL) + return DEVICE_FAIL; + else if (_status == DEVICE_NOTSUPPORTED + || _d3d9.Status() == DEVICE_NOTSUPPORTED) + return DEVICE_NOTSUPPORTED; + else + return DEVICE_PASS; +} + +bool CDXVAWrapper::AdapterNext() +{ + if (DEVICE_PASS != _status) return false; + + _adapterFound = _d3d9.AdapterNext(); + _status = _d3d9.Status(); + if (DEVICE_PASS != _status) + { + _adapterFound = false; + return false; + } + + if (!_adapterFound) return false; + + DXVAHDDestroy(); + _status = DXVAHDInit(); + if (DEVICE_PASS != _status) + { + _adapterFound = false; + return false; + } + + return true; +} + +TDeviceStatus CDXVAWrapper::DXVAHDInit() +{ + if ((_status == DEVICE_FAIL) || (_d3d9.Status() == DEVICE_FAIL) + || !_adapterFound) + return DEVICE_FAIL; + + DXVAHD_RATIONAL fps = { VIDEO_FPS, 1 }; + + DXVAHD_CONTENT_DESC desc; + desc.InputFrameFormat = DXVAHD_FRAME_FORMAT_PROGRESSIVE; + desc.InputFrameRate = fps; + desc.InputWidth = WindowWidth(); + desc.InputHeight = WindowHeight(); + desc.OutputFrameRate = fps; + desc.OutputWidth = WindowWidth(); + desc.OutputHeight = WindowHeight(); + +#ifdef USE_SOFTWARE_PLUGIN + _status = DEVICE_FAIL; + return DEVICE_FAIL; +#endif + + HRESULT hr = DXVAHD_CreateDevice( + static_cast(_d3d9.Device()), &desc, + DXVAHD_DEVICE_USAGE_PLAYBACK_NORMAL, NULL, &_dxvaDevice); + if (FAILED(hr)) + { + if (hr == E_NOINTERFACE) + { + log_error( + "DXVAHD_CreateDevice skipped due to no supported devices!\n"); + _status = DEVICE_NOTSUPPORTED; + } + else + { + log_error("DXVAHD_CreateDevice failed\n"); + _status = DEVICE_FAIL; + } + } + + return _status; +} + +void CDXVAWrapper::DXVAHDDestroy() +{ + if (_dxvaDevice) _dxvaDevice->Release(); + _dxvaDevice = 0; +} + +void *CDXVAWrapper::D3D() const { return _d3d9.D3D(); } + +unsigned int CDXVAWrapper::AdapterIdx() const { return _d3d9.AdapterIdx(); } + +const CD3D9ExWrapper &CDXVAWrapper::D3D9() const { return _d3d9; } + +CD3D9SurfaceWrapper::CD3D9SurfaceWrapper(): mMem(NULL) {} + +CD3D9SurfaceWrapper::CD3D9SurfaceWrapper(IDirect3DSurface9 *mem): mMem(mem) {} + +CD3D9SurfaceWrapper::~CD3D9SurfaceWrapper() +{ + if (mMem != NULL) mMem->Release(); + mMem = NULL; +} + +#endif + +CSurfaceWrapper::CSurfaceWrapper() {} + +CSurfaceWrapper::~CSurfaceWrapper() {} diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/wrappers.h b/test_conformance/extensions/cl_khr_dx9_media_sharing/wrappers.h new file mode 100644 index 0000000000..e3a7c6d818 --- /dev/null +++ b/test_conformance/extensions/cl_khr_dx9_media_sharing/wrappers.h @@ -0,0 +1,195 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#ifndef __WRAPPERS_H +#define __WRAPPERS_H + +#if defined(_WIN32) +#include +#if defined(__MINGW32__) +#include +typedef unsigned char UINT8; +#define __out +#define __in +#define __inout +#define __out_bcount(size) +#define __out_bcount_opt(size) +#define __in_opt +#define __in_ecount(size) +#define __in_ecount_opt(size) +#define __out_opt +#define __out_ecount(size) +#define __out_ecount_opt(size) +#define __in_bcount_opt(size) +#define __inout_opt +#define __inout_bcount(size) +#define __in_bcount(size) +#define __deref_out +#endif +#include +#include +#endif + +enum TDeviceStatus +{ + DEVICE_NOTSUPPORTED, + DEVICE_PASS, + DEVICE_FAIL, +}; + +class CDeviceWrapper { +public: + enum TAccelerationType + { + ACCELERATION_HW, + ACCELERATION_SW, + }; + + CDeviceWrapper(); + virtual ~CDeviceWrapper(); + + virtual bool AdapterNext() = 0; + virtual unsigned int AdapterIdx() const = 0; + virtual void *Device() const = 0; + virtual TDeviceStatus Status() const = 0; + virtual void *D3D() const = 0; + +#if defined(_WIN32) + HWND WindowHandle() const; +#endif + int WindowWidth() const; + int WindowHeight() const; + void WindowInit(); + + + static TAccelerationType AccelerationType(); + static void AccelerationType(TAccelerationType accelerationTypeNew); + +private: + static LPCTSTR WINDOW_TITLE; + static const int WINDOW_WIDTH; + static const int WINDOW_HEIGHT; + static TAccelerationType accelerationType; + +#if defined(_WIN32) + HMODULE _hInstance; + HWND _hWnd; +#endif + + void WindowDestroy(); +}; + +class CSurfaceWrapper { +public: + CSurfaceWrapper(); + virtual ~CSurfaceWrapper(); +}; + +#if defined(_WIN32) +// windows specific wrappers +class CD3D9Wrapper : public CDeviceWrapper { +public: + CD3D9Wrapper(); + ~CD3D9Wrapper(); + + virtual bool AdapterNext(); + virtual unsigned int AdapterIdx() const; + virtual void *Device() const; + virtual TDeviceStatus Status() const; + virtual void *D3D() const; + +private: + LPDIRECT3D9 _d3d9; + LPDIRECT3DDEVICE9 _d3dDevice; + D3DDISPLAYMODE _d3ddm; + D3DADAPTER_IDENTIFIER9 _adapter; + TDeviceStatus _status; + unsigned int _adapterIdx; + bool _adapterFound; + + D3DFORMAT Format(); + D3DADAPTER_IDENTIFIER9 Adapter(); + int Init(); + void Destroy(); +}; + +class CD3D9ExWrapper : public CDeviceWrapper { +public: + CD3D9ExWrapper(); + ~CD3D9ExWrapper(); + + virtual bool AdapterNext(); + virtual unsigned int AdapterIdx() const; + virtual void *Device() const; + virtual TDeviceStatus Status() const; + virtual void *D3D() const; + +private: + LPDIRECT3D9EX _d3d9Ex; + LPDIRECT3DDEVICE9EX _d3dDeviceEx; + D3DDISPLAYMODEEX _d3ddmEx; + D3DADAPTER_IDENTIFIER9 _adapter; + TDeviceStatus _status; + unsigned int _adapterIdx; + bool _adapterFound; + + D3DFORMAT Format(); + D3DADAPTER_IDENTIFIER9 Adapter(); + int Init(); + void Destroy(); +}; + +class CDXVAWrapper : public CDeviceWrapper { +public: + CDXVAWrapper(); + ~CDXVAWrapper(); + + virtual bool AdapterNext(); + virtual unsigned int AdapterIdx() const; + virtual void *Device() const; + virtual TDeviceStatus Status() const; + virtual void *D3D() const; + const CD3D9ExWrapper &D3D9() const; + +private: + CD3D9ExWrapper _d3d9; + IDXVAHD_Device *_dxvaDevice; + TDeviceStatus _status; + bool _adapterFound; + + static const D3DFORMAT RENDER_TARGET_FORMAT; + static const D3DFORMAT VIDEO_FORMAT; + static const unsigned int VIDEO_FPS; + + TDeviceStatus DXVAHDInit(); + void DXVAHDDestroy(); +}; + +class CD3D9SurfaceWrapper : public CSurfaceWrapper { +public: + CD3D9SurfaceWrapper(); + CD3D9SurfaceWrapper(IDirect3DSurface9 *mem); + ~CD3D9SurfaceWrapper(); + + operator IDirect3DSurface9 *() { return mMem; } + IDirect3DSurface9 **operator&() { return &mMem; } + IDirect3DSurface9 *operator->() const { return mMem; } + +private: + IDirect3DSurface9 *mMem; +}; +#endif + +#endif // __D3D_WRAPPERS diff --git a/test_conformance/generic_address_space/basic_tests.cpp b/test_conformance/generic_address_space/basic_tests.cpp index 0b81564dab..b2e745c0fe 100644 --- a/test_conformance/generic_address_space/basic_tests.cpp +++ b/test_conformance/generic_address_space/basic_tests.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -39,7 +39,9 @@ class CBasicTest : CTest { const char *srcPtr = src.c_str(); - if (create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, &srcPtr, "testKernel", "-cl-std=CL2.0")) { + if (create_single_kernel_helper(context, &program, &kernel, 1, &srcPtr, + "testKernel")) + { log_error("create_single_kernel_helper failed"); return -1; } diff --git a/test_conformance/generic_address_space/stress_tests.cpp b/test_conformance/generic_address_space/stress_tests.cpp index 4f94a5d098..7193e69236 100644 --- a/test_conformance/generic_address_space/stress_tests.cpp +++ b/test_conformance/generic_address_space/stress_tests.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -41,7 +41,9 @@ class CStressTest : public CTest { const char *srcPtr = src.c_str(); - if (create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, &srcPtr, "testKernel", "-cl-std=CL2.0")) { + if (create_single_kernel_helper(context, &program, &kernel, 1, &srcPtr, + "testKernel")) + { log_error("create_single_kernel_helper failed"); return -1; } diff --git a/test_conformance/gl/test_buffers.cpp b/test_conformance/gl/test_buffers.cpp index f11590fb91..c61610d090 100644 --- a/test_conformance/gl/test_buffers.cpp +++ b/test_conformance/gl/test_buffers.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -17,126 +17,126 @@ #include "harness/conversions.h" #include "harness/typeWrappers.h" -#if !defined (__APPLE__) - #include +#if !defined(__APPLE__) +#include #endif static const char *bufferKernelPattern = -"__kernel void sample_test( __global %s%s *source, __global %s%s *clDest, __global %s%s *glDest )\n" -"{\n" -" int tid = get_global_id(0);\n" -" clDest[ tid ] = source[ tid ] + (%s%s)(1);\n" -" glDest[ tid ] = source[ tid ] + (%s%s)(2);\n" -"}\n"; - -#define TYPE_CASE( enum, type, range, offset ) \ - case enum: \ - { \ - cl_##type *ptr = (cl_##type *)outData; \ - for( i = 0; i < count; i++ ) \ - ptr[ i ] = (cl_##type)( ( genrand_int32(d) & range ) - offset ); \ - break; \ + "__kernel void sample_test( __global %s%s *source, __global %s%s *clDest, " + "__global %s%s *glDest )\n" + "{\n" + " int tid = get_global_id(0);\n" + " clDest[ tid ] = source[ tid ] + (%s%s)(1);\n" + " glDest[ tid ] = source[ tid ] + (%s%s)(2);\n" + "}\n"; + +#define TYPE_CASE(enum, type, range, offset) \ + case enum: { \ + cl_##type *ptr = (cl_##type *)outData; \ + for (i = 0; i < count; i++) \ + ptr[i] = (cl_##type)((genrand_int32(d) & range) - offset); \ + break; \ } -void gen_input_data( ExplicitType type, size_t count, MTdata d, void *outData ) +void gen_input_data(ExplicitType type, size_t count, MTdata d, void *outData) { size_t i; - switch( type ) + switch (type) { - case kBool: - { + case kBool: { bool *boolPtr = (bool *)outData; - for( i = 0; i < count; i++ ) + for (i = 0; i < count; i++) { - boolPtr[i] = ( genrand_int32(d) & 1 ) ? true : false; + boolPtr[i] = (genrand_int32(d) & 1) ? true : false; } break; } - TYPE_CASE( kChar, char, 250, 127 ) - TYPE_CASE( kUChar, uchar, 250, 0 ) - TYPE_CASE( kShort, short, 65530, 32767 ) - TYPE_CASE( kUShort, ushort, 65530, 0 ) - TYPE_CASE( kInt, int, 0x0fffffff, 0x70000000 ) - TYPE_CASE( kUInt, uint, 0x0fffffff, 0 ) + TYPE_CASE(kChar, char, 250, 127) + TYPE_CASE(kUChar, uchar, 250, 0) + TYPE_CASE(kShort, short, 65530, 32767) + TYPE_CASE(kUShort, ushort, 65530, 0) + TYPE_CASE(kInt, int, 0x0fffffff, 0x70000000) + TYPE_CASE(kUInt, uint, 0x0fffffff, 0) - case kLong: - { + case kLong: { cl_long *longPtr = (cl_long *)outData; - for( i = 0; i < count; i++ ) + for (i = 0; i < count; i++) { - longPtr[i] = (cl_long)genrand_int32(d) | ( (cl_ulong)genrand_int32(d) << 32 ); + longPtr[i] = (cl_long)genrand_int32(d) + | ((cl_ulong)genrand_int32(d) << 32); } break; } - case kULong: - { + case kULong: { cl_ulong *ulongPtr = (cl_ulong *)outData; - for( i = 0; i < count; i++ ) + for (i = 0; i < count; i++) { - ulongPtr[i] = (cl_ulong)genrand_int32(d) | ( (cl_ulong)genrand_int32(d) << 32 ); + ulongPtr[i] = (cl_ulong)genrand_int32(d) + | ((cl_ulong)genrand_int32(d) << 32); } break; } - case kFloat: - { + case kFloat: { cl_float *floatPtr = (float *)outData; - for( i = 0; i < count; i++ ) - floatPtr[i] = get_random_float( -100000.f, 100000.f, d ); + for (i = 0; i < count; i++) + floatPtr[i] = get_random_float(-100000.f, 100000.f, d); break; } default: - log_error( "ERROR: Invalid type passed in to generate_random_data!\n" ); + log_error( + "ERROR: Invalid type passed in to generate_random_data!\n"); break; } } -#define INC_CASE( enum, type ) \ - case enum: \ - { \ - cl_##type *src = (cl_##type *)inData; \ - cl_##type *dst = (cl_##type *)outData; \ - *dst = *src + 1; \ - break; \ +#define INC_CASE(enum, type) \ + case enum: { \ + cl_##type *src = (cl_##type *)inData; \ + cl_##type *dst = (cl_##type *)outData; \ + *dst = *src + 1; \ + break; \ } -void get_incremented_value( void *inData, void *outData, ExplicitType type ) +void get_incremented_value(void *inData, void *outData, ExplicitType type) { - switch( type ) + switch (type) { - INC_CASE( kChar, char ) - INC_CASE( kUChar, uchar ) - INC_CASE( kShort, short ) - INC_CASE( kUShort, ushort ) - INC_CASE( kInt, int ) - INC_CASE( kUInt, uint ) - INC_CASE( kLong, long ) - INC_CASE( kULong, ulong ) - INC_CASE( kFloat, float ) - default: - break; + INC_CASE(kChar, char) + INC_CASE(kUChar, uchar) + INC_CASE(kShort, short) + INC_CASE(kUShort, ushort) + INC_CASE(kInt, int) + INC_CASE(kUInt, uint) + INC_CASE(kLong, long) + INC_CASE(kULong, ulong) + INC_CASE(kFloat, float) + default: break; } } -int test_buffer_kernel(cl_context context, cl_command_queue queue, ExplicitType vecType, size_t vecSize, int numElements, int validate_only, MTdata d) +int test_buffer_kernel(cl_context context, cl_command_queue queue, + ExplicitType vecType, size_t vecSize, int numElements, + int validate_only, MTdata d) { clProgramWrapper program; clKernelWrapper kernel; - clMemWrapper streams[ 3 ]; + clMemWrapper streams[3]; size_t dataSize = numElements * 16 * sizeof(cl_long); #if !(defined(_WIN32) && defined(_MSC_VER)) - cl_long inData[numElements * 16], outDataCL[numElements * 16], outDataGL[ numElements * 16 ]; + cl_long inData[numElements * 16], outDataCL[numElements * 16], + outDataGL[numElements * 16]; #else - cl_long* inData = (cl_long*)_malloca(dataSize); - cl_long* outDataCL = (cl_long*)_malloca(dataSize); - cl_long* outDataGL = (cl_long*)_malloca(dataSize); + cl_long *inData = (cl_long *)_malloca(dataSize); + cl_long *outDataCL = (cl_long *)_malloca(dataSize); + cl_long *outDataGL = (cl_long *)_malloca(dataSize); #endif glBufferWrapper inGLBuffer, outGLBuffer; - int i; + int i; size_t bufferSize; int error; @@ -146,210 +146,259 @@ int test_buffer_kernel(cl_context context, cl_command_queue queue, ExplicitType char sizeName[4]; /* Create the source */ - if( vecSize == 1 ) - sizeName[ 0 ] = 0; + if (vecSize == 1) + sizeName[0] = 0; else - sprintf( sizeName, "%d", (int)vecSize ); + sprintf(sizeName, "%d", (int)vecSize); - sprintf( kernelSource, bufferKernelPattern, get_explicit_type_name( vecType ), sizeName, - get_explicit_type_name( vecType ), sizeName, - get_explicit_type_name( vecType ), sizeName, - get_explicit_type_name( vecType ), sizeName, - get_explicit_type_name( vecType ), sizeName ); + sprintf(kernelSource, bufferKernelPattern, get_explicit_type_name(vecType), + sizeName, get_explicit_type_name(vecType), sizeName, + get_explicit_type_name(vecType), sizeName, + get_explicit_type_name(vecType), sizeName, + get_explicit_type_name(vecType), sizeName); /* Create kernels */ programPtr = kernelSource; - if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) ) + if (create_single_kernel_helper(context, &program, &kernel, 1, + (const char **)&programPtr, "sample_test")) { return -1; } - bufferSize = numElements * vecSize * get_explicit_type_size( vecType ); + bufferSize = numElements * vecSize * get_explicit_type_size(vecType); /* Generate some almost-random input data */ - gen_input_data( vecType, vecSize * numElements, d, inData ); - memset( outDataCL, 0, dataSize ); - memset( outDataGL, 0, dataSize ); + gen_input_data(vecType, vecSize * numElements, d, inData); + memset(outDataCL, 0, dataSize); + memset(outDataGL, 0, dataSize); /* Generate some GL buffers to go against */ - glGenBuffers( 1, &inGLBuffer ); - glGenBuffers( 1, &outGLBuffer ); + glGenBuffers(1, &inGLBuffer); + glGenBuffers(1, &outGLBuffer); - glBindBuffer( GL_ARRAY_BUFFER, inGLBuffer ); - glBufferData( GL_ARRAY_BUFFER, bufferSize, inData, GL_STATIC_DRAW ); + glBindBuffer(GL_ARRAY_BUFFER, inGLBuffer); + glBufferData(GL_ARRAY_BUFFER, bufferSize, inData, GL_STATIC_DRAW); - // Note: we need to bind the output buffer, even though we don't care about its values yet, - // because CL needs it to get the buffer size - glBindBuffer( GL_ARRAY_BUFFER, outGLBuffer ); - glBufferData( GL_ARRAY_BUFFER, bufferSize, outDataGL, GL_STATIC_DRAW ); + // Note: we need to bind the output buffer, even though we don't care about + // its values yet, because CL needs it to get the buffer size + glBindBuffer(GL_ARRAY_BUFFER, outGLBuffer); + glBufferData(GL_ARRAY_BUFFER, bufferSize, outDataGL, GL_STATIC_DRAW); - glBindBuffer( GL_ARRAY_BUFFER, 0 ); - glFlush(); + glBindBuffer(GL_ARRAY_BUFFER, 0); + glFinish(); - /* Generate some streams. The first and last ones are GL, middle one just vanilla CL */ - streams[ 0 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_ONLY, inGLBuffer, &error ); - test_error( error, "Unable to create input GL buffer" ); + /* Generate some streams. The first and last ones are GL, middle one just + * vanilla CL */ + streams[0] = (*clCreateFromGLBuffer_ptr)(context, CL_MEM_READ_ONLY, + inGLBuffer, &error); + test_error(error, "Unable to create input GL buffer"); - streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, bufferSize, NULL, &error ); - test_error( error, "Unable to create output CL buffer" ); + streams[1] = + clCreateBuffer(context, CL_MEM_READ_WRITE, bufferSize, NULL, &error); + test_error(error, "Unable to create output CL buffer"); - streams[ 2 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_WRITE_ONLY, outGLBuffer, &error ); - test_error( error, "Unable to create output GL buffer" ); + streams[2] = (*clCreateFromGLBuffer_ptr)(context, CL_MEM_WRITE_ONLY, + outGLBuffer, &error); + test_error(error, "Unable to create output GL buffer"); - /* Validate the info */ - if (validate_only) { - int result = (CheckGLObjectInfo(streams[0], CL_GL_OBJECT_BUFFER, (GLuint)inGLBuffer, (GLenum)0, 0) | - CheckGLObjectInfo(streams[2], CL_GL_OBJECT_BUFFER, (GLuint)outGLBuffer, (GLenum)0, 0) ); - for(i=0;i<3;i++) + /* Validate the info */ + if (validate_only) { - clReleaseMemObject(streams[i]); - streams[i] = NULL; - } + int result = (CheckGLObjectInfo(streams[0], CL_GL_OBJECT_BUFFER, + (GLuint)inGLBuffer, (GLenum)0, 0) + | CheckGLObjectInfo(streams[2], CL_GL_OBJECT_BUFFER, + (GLuint)outGLBuffer, (GLenum)0, 0)); + for (i = 0; i < 3; i++) + { + streams[i].reset(); + } - glDeleteBuffers(1, &inGLBuffer); inGLBuffer = 0; - glDeleteBuffers(1, &outGLBuffer); outGLBuffer = 0; + glDeleteBuffers(1, &inGLBuffer); + inGLBuffer = 0; + glDeleteBuffers(1, &outGLBuffer); + outGLBuffer = 0; - return result; - } + return result; + } /* Assign streams and execute */ - for( int i = 0; i < 3; i++ ) + for (int i = 0; i < 3; i++) { - error = clSetKernelArg( kernel, i, sizeof( streams[ i ] ), &streams[ i ] ); - test_error( error, "Unable to set kernel arguments" ); + error = clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]); + test_error(error, "Unable to set kernel arguments"); } - error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &streams[ 0 ], 0, NULL, NULL); - test_error( error, "Unable to acquire GL obejcts"); - error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &streams[ 2 ], 0, NULL, NULL); - test_error( error, "Unable to acquire GL obejcts"); + error = + (*clEnqueueAcquireGLObjects_ptr)(queue, 1, &streams[0], 0, NULL, NULL); + test_error(error, "Unable to acquire GL obejcts"); + error = + (*clEnqueueAcquireGLObjects_ptr)(queue, 1, &streams[2], 0, NULL, NULL); + test_error(error, "Unable to acquire GL obejcts"); /* Run the kernel */ threads[0] = numElements; - error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] ); - test_error( error, "Unable to get work group size to use" ); - - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL ); - test_error( error, "Unable to execute test kernel" ); - - error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &streams[ 0 ], 0, NULL, NULL ); - test_error(error, "clEnqueueReleaseGLObjects failed"); - error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &streams[ 2 ], 0, NULL, NULL ); - test_error(error, "clEnqueueReleaseGLObjects failed"); - - // Get the results from both CL and GL and make sure everything looks correct - error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, bufferSize, outDataCL, 0, NULL, NULL ); - test_error( error, "Unable to read output CL array!" ); - - glBindBuffer( GL_ARRAY_BUFFER, outGLBuffer ); - void *glMem = glMapBuffer( GL_ARRAY_BUFFER, GL_READ_ONLY ); - memcpy( outDataGL, glMem, bufferSize ); - glUnmapBuffer( GL_ARRAY_BUFFER ); - - char *inP = (char *)inData, *glP = (char *)outDataGL, *clP = (char *)outDataCL; + error = get_max_common_work_group_size(context, kernel, threads[0], + &localThreads[0]); + test_error(error, "Unable to get work group size to use"); + + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, + localThreads, 0, NULL, NULL); + test_error(error, "Unable to execute test kernel"); + + error = + (*clEnqueueReleaseGLObjects_ptr)(queue, 1, &streams[0], 0, NULL, NULL); + test_error(error, "clEnqueueReleaseGLObjects failed"); + error = + (*clEnqueueReleaseGLObjects_ptr)(queue, 1, &streams[2], 0, NULL, NULL); + test_error(error, "clEnqueueReleaseGLObjects failed"); + + // Get the results from both CL and GL and make sure everything looks + // correct + error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, bufferSize, + outDataCL, 0, NULL, NULL); + test_error(error, "Unable to read output CL array!"); + + glBindBuffer(GL_ARRAY_BUFFER, outGLBuffer); + void *glMem = glMapBuffer(GL_ARRAY_BUFFER, GL_READ_ONLY); + memcpy(outDataGL, glMem, bufferSize); + glUnmapBuffer(GL_ARRAY_BUFFER); + + char *inP = (char *)inData, *glP = (char *)outDataGL, + *clP = (char *)outDataCL; error = 0; - for( size_t i = 0; i < numElements * vecSize; i++ ) + for (size_t i = 0; i < numElements * vecSize; i++) { cl_long expectedCLValue, expectedGLValue; - get_incremented_value( inP, &expectedCLValue, vecType ); - get_incremented_value( &expectedCLValue, &expectedGLValue, vecType ); + get_incremented_value(inP, &expectedCLValue, vecType); + get_incremented_value(&expectedCLValue, &expectedGLValue, vecType); - if( memcmp( clP, &expectedCLValue, get_explicit_type_size( vecType ) ) != 0 ) + if (memcmp(clP, &expectedCLValue, get_explicit_type_size(vecType)) != 0) { - char scratch[ 64 ]; - log_error( "ERROR: Data sample %d from the CL output did not validate!\n", (int)i ); - log_error( "\t Input: %s\n", GetDataVectorString( inP, get_explicit_type_size( vecType ), 1, scratch ) ); - log_error( "\tExpected: %s\n", GetDataVectorString( &expectedCLValue, get_explicit_type_size( vecType ), 1, scratch ) ); - log_error( "\t Actual: %s\n", GetDataVectorString( clP, get_explicit_type_size( vecType ), 1, scratch ) ); + char scratch[64]; + log_error( + "ERROR: Data sample %d from the CL output did not validate!\n", + (int)i); + log_error("\t Input: %s\n", + GetDataVectorString(inP, get_explicit_type_size(vecType), + 1, scratch)); + log_error("\tExpected: %s\n", + GetDataVectorString(&expectedCLValue, + get_explicit_type_size(vecType), 1, + scratch)); + log_error("\t Actual: %s\n", + GetDataVectorString(clP, get_explicit_type_size(vecType), + 1, scratch)); error = -1; } - if( memcmp( glP, &expectedGLValue, get_explicit_type_size( vecType ) ) != 0 ) + if (memcmp(glP, &expectedGLValue, get_explicit_type_size(vecType)) != 0) { - char scratch[ 64 ]; - log_error( "ERROR: Data sample %d from the GL output did not validate!\n", (int)i ); - log_error( "\t Input: %s\n", GetDataVectorString( inP, get_explicit_type_size( vecType ), 1, scratch ) ); - log_error( "\tExpected: %s\n", GetDataVectorString( &expectedGLValue, get_explicit_type_size( vecType ), 1, scratch ) ); - log_error( "\t Actual: %s\n", GetDataVectorString( glP, get_explicit_type_size( vecType ), 1, scratch ) ); + char scratch[64]; + log_error( + "ERROR: Data sample %d from the GL output did not validate!\n", + (int)i); + log_error("\t Input: %s\n", + GetDataVectorString(inP, get_explicit_type_size(vecType), + 1, scratch)); + log_error("\tExpected: %s\n", + GetDataVectorString(&expectedGLValue, + get_explicit_type_size(vecType), 1, + scratch)); + log_error("\t Actual: %s\n", + GetDataVectorString(glP, get_explicit_type_size(vecType), + 1, scratch)); error = -1; } - if( error ) - return error; + if (error) return error; - inP += get_explicit_type_size( vecType ); - glP += get_explicit_type_size( vecType ); - clP += get_explicit_type_size( vecType ); + inP += get_explicit_type_size(vecType); + glP += get_explicit_type_size(vecType); + clP += get_explicit_type_size(vecType); } - for(i=0;i<3;i++) + for (i = 0; i < 3; i++) { - clReleaseMemObject(streams[i]); - streams[i] = NULL; + streams[i].reset(); } - glDeleteBuffers(1, &inGLBuffer); inGLBuffer = 0; - glDeleteBuffers(1, &outGLBuffer); outGLBuffer = 0; + glDeleteBuffers(1, &inGLBuffer); + inGLBuffer = 0; + glDeleteBuffers(1, &outGLBuffer); + outGLBuffer = 0; return 0; } -int test_buffers( cl_device_id device, cl_context context, cl_command_queue queue, int numElements ) +int test_buffers(cl_device_id device, cl_context context, + cl_command_queue queue, int numElements) { - ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kNumExplicitTypes }; + ExplicitType vecType[] = { + kChar, kUChar, kShort, kUShort, kInt, + kUInt, kLong, kULong, kFloat, kNumExplicitTypes + }; unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 }; unsigned int index, typeIndex; int retVal = 0; RandomSeed seed(gRandomSeed); - for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ ) + for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++) { - for( index = 0; vecSizes[ index ] != 0; index++ ) + for (index = 0; vecSizes[index] != 0; index++) { // Test! - if( test_buffer_kernel( context, queue, vecType[ typeIndex ], vecSizes[ index ], numElements, 0, seed) != 0 ) + if (test_buffer_kernel(context, queue, vecType[typeIndex], + vecSizes[index], numElements, 0, seed) + != 0) { - char sizeNames[][ 4 ] = { "", "", "2", "", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" }; - log_error( " Buffer test %s%s FAILED\n", get_explicit_type_name( vecType[ typeIndex ] ), sizeNames[ vecSizes[ index ] ] ); + char sizeNames[][4] = { "", "", "2", "", "4", "", "", "", "8", + "", "", "", "", "", "", "", "16" }; + log_error(" Buffer test %s%s FAILED\n", + get_explicit_type_name(vecType[typeIndex]), + sizeNames[vecSizes[index]]); retVal++; } } } return retVal; - } -int test_buffers_getinfo( cl_device_id device, cl_context context, cl_command_queue queue, int numElements ) +int test_buffers_getinfo(cl_device_id device, cl_context context, + cl_command_queue queue, int numElements) { - ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kNumExplicitTypes }; + ExplicitType vecType[] = { + kChar, kUChar, kShort, kUShort, kInt, + kUInt, kLong, kULong, kFloat, kNumExplicitTypes + }; unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 }; unsigned int index, typeIndex; int retVal = 0; - RandomSeed seed( gRandomSeed ); + RandomSeed seed(gRandomSeed); - for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ ) + for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++) { - for( index = 0; vecSizes[ index ] != 0; index++ ) + for (index = 0; vecSizes[index] != 0; index++) { // Test! - if( test_buffer_kernel( context, queue, vecType[ typeIndex ], vecSizes[ index ], numElements, 1, seed ) != 0 ) + if (test_buffer_kernel(context, queue, vecType[typeIndex], + vecSizes[index], numElements, 1, seed) + != 0) { - char sizeNames[][ 4 ] = { "", "", "2", "", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" }; - log_error( " Buffer test %s%s FAILED\n", get_explicit_type_name( vecType[ typeIndex ] ), sizeNames[ vecSizes[ index ] ] ); + char sizeNames[][4] = { "", "", "2", "", "4", "", "", "", "8", + "", "", "", "", "", "", "", "16" }; + log_error(" Buffer test %s%s FAILED\n", + get_explicit_type_name(vecType[typeIndex]), + sizeNames[vecSizes[index]]); retVal++; } } } return retVal; - } - - - diff --git a/test_conformance/gl/test_fence_sync.cpp b/test_conformance/gl/test_fence_sync.cpp index 00bf2cc90b..35cc62de62 100644 --- a/test_conformance/gl/test_fence_sync.cpp +++ b/test_conformance/gl/test_fence_sync.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -17,7 +17,7 @@ #include "gl/setup.h" #include "harness/genericThread.h" -#if defined( __APPLE__ ) +#if defined(__APPLE__) #include #else #include @@ -40,112 +40,121 @@ typedef struct __GLsync *GLsync; #define APIENTRY #endif -typedef GLsync (APIENTRY *glFenceSyncPtr)(GLenum condition,GLbitfield flags); +typedef GLsync(APIENTRY *glFenceSyncPtr)(GLenum condition, GLbitfield flags); glFenceSyncPtr glFenceSyncFunc; -typedef bool (APIENTRY *glIsSyncPtr)(GLsync sync); +typedef bool(APIENTRY *glIsSyncPtr)(GLsync sync); glIsSyncPtr glIsSyncFunc; -typedef void (APIENTRY *glDeleteSyncPtr)(GLsync sync); +typedef void(APIENTRY *glDeleteSyncPtr)(GLsync sync); glDeleteSyncPtr glDeleteSyncFunc; -typedef GLenum (APIENTRY *glClientWaitSyncPtr)(GLsync sync,GLbitfield flags,GLuint64 timeout); +typedef GLenum(APIENTRY *glClientWaitSyncPtr)(GLsync sync, GLbitfield flags, + GLuint64 timeout); glClientWaitSyncPtr glClientWaitSyncFunc; -typedef void (APIENTRY *glWaitSyncPtr)(GLsync sync,GLbitfield flags,GLuint64 timeout); +typedef void(APIENTRY *glWaitSyncPtr)(GLsync sync, GLbitfield flags, + GLuint64 timeout); glWaitSyncPtr glWaitSyncFunc; -typedef void (APIENTRY *glGetInteger64vPtr)(GLenum pname, GLint64 *params); +typedef void(APIENTRY *glGetInteger64vPtr)(GLenum pname, GLint64 *params); glGetInteger64vPtr glGetInteger64vFunc; -typedef void (APIENTRY *glGetSyncivPtr)(GLsync sync,GLenum pname,GLsizei bufSize,GLsizei *length, - GLint *values); +typedef void(APIENTRY *glGetSyncivPtr)(GLsync sync, GLenum pname, + GLsizei bufSize, GLsizei *length, + GLint *values); glGetSyncivPtr glGetSyncivFunc; #define CHK_GL_ERR() printf("%s\n", gluErrorString(glGetError())) -static void InitSyncFns( void ) +static void InitSyncFns(void) { - glFenceSyncFunc = (glFenceSyncPtr)glutGetProcAddress( "glFenceSync" ); - glIsSyncFunc = (glIsSyncPtr)glutGetProcAddress( "glIsSync" ); - glDeleteSyncFunc = (glDeleteSyncPtr)glutGetProcAddress( "glDeleteSync" ); - glClientWaitSyncFunc = (glClientWaitSyncPtr)glutGetProcAddress( "glClientWaitSync" ); - glWaitSyncFunc = (glWaitSyncPtr)glutGetProcAddress( "glWaitSync" ); - glGetInteger64vFunc = (glGetInteger64vPtr)glutGetProcAddress( "glGetInteger64v" ); - glGetSyncivFunc = (glGetSyncivPtr)glutGetProcAddress( "glGetSynciv" ); + glFenceSyncFunc = (glFenceSyncPtr)glutGetProcAddress("glFenceSync"); + glIsSyncFunc = (glIsSyncPtr)glutGetProcAddress("glIsSync"); + glDeleteSyncFunc = (glDeleteSyncPtr)glutGetProcAddress("glDeleteSync"); + glClientWaitSyncFunc = + (glClientWaitSyncPtr)glutGetProcAddress("glClientWaitSync"); + glWaitSyncFunc = (glWaitSyncPtr)glutGetProcAddress("glWaitSync"); + glGetInteger64vFunc = + (glGetInteger64vPtr)glutGetProcAddress("glGetInteger64v"); + glGetSyncivFunc = (glGetSyncivPtr)glutGetProcAddress("glGetSynciv"); } #ifndef GL_ARB_sync -#define GL_MAX_SERVER_WAIT_TIMEOUT 0x9111 +#define GL_MAX_SERVER_WAIT_TIMEOUT 0x9111 -#define GL_OBJECT_TYPE 0x9112 -#define GL_SYNC_CONDITION 0x9113 -#define GL_SYNC_STATUS 0x9114 -#define GL_SYNC_FLAGS 0x9115 +#define GL_OBJECT_TYPE 0x9112 +#define GL_SYNC_CONDITION 0x9113 +#define GL_SYNC_STATUS 0x9114 +#define GL_SYNC_FLAGS 0x9115 -#define GL_SYNC_FENCE 0x9116 +#define GL_SYNC_FENCE 0x9116 -#define GL_SYNC_GPU_COMMANDS_COMPLETE 0x9117 +#define GL_SYNC_GPU_COMMANDS_COMPLETE 0x9117 -#define GL_UNSIGNALED 0x9118 -#define GL_SIGNALED 0x9119 +#define GL_UNSIGNALED 0x9118 +#define GL_SIGNALED 0x9119 -#define GL_SYNC_FLUSH_COMMANDS_BIT 0x00000001 +#define GL_SYNC_FLUSH_COMMANDS_BIT 0x00000001 -#define GL_TIMEOUT_IGNORED 0xFFFFFFFFFFFFFFFFull +#define GL_TIMEOUT_IGNORED 0xFFFFFFFFFFFFFFFFull -#define GL_ALREADY_SIGNALED 0x911A -#define GL_TIMEOUT_EXPIRED 0x911B -#define GL_CONDITION_SATISFIED 0x911C -#define GL_WAIT_FAILED 0x911D +#define GL_ALREADY_SIGNALED 0x911A +#define GL_TIMEOUT_EXPIRED 0x911B +#define GL_CONDITION_SATISFIED 0x911C +#define GL_WAIT_FAILED 0x911D #endif #define USING_ARB_sync 1 #endif -typedef cl_event (CL_API_CALL *clCreateEventFromGLsyncKHR_fn)( cl_context context, GLsync sync, cl_int *errCode_ret) ; +typedef cl_event(CL_API_CALL *clCreateEventFromGLsyncKHR_fn)( + cl_context context, GLsync sync, cl_int *errCode_ret); clCreateEventFromGLsyncKHR_fn clCreateEventFromGLsyncKHR_ptr; static const char *updateBuffersKernel[] = { - "__kernel void update( __global float4 * vertices, __global float4 *colors, int horizWrap, int rowIdx )\n" + "__kernel void update( __global float4 * vertices, __global float4 " + "*colors, int horizWrap, int rowIdx )\n" "{\n" " size_t tid = get_global_id(0);\n" "\n" " size_t xVal = ( tid & ( horizWrap - 1 ) );\n" " vertices[ tid * 2 + 0 ] = (float4)( xVal, rowIdx*16.f, 0.0f, 1.f );\n" - " vertices[ tid * 2 + 1 ] = (float4)( xVal, rowIdx*16.f + 4.0f, 0.0f, 1.f );\n" + " vertices[ tid * 2 + 1 ] = (float4)( xVal, rowIdx*16.f + 4.0f, 0.0f, " + "1.f );\n" "\n" " int rowV = rowIdx + 1;\n" - " colors[ tid * 2 + 0 ] = (float4)( ( rowV & 1 ) / 255.f, ( ( rowV & 2 ) >> 1 ) / 255.f, ( ( rowV & 4 ) >> 2 ) / 255.f, 1.f );\n" - " //colors[ tid * 2 + 0 ] = (float4)( (float)xVal/(float)horizWrap, 1.0f, 1.0f, 1.0f );\n" + " colors[ tid * 2 + 0 ] = (float4)( ( rowV & 1 ) / 255.f, ( ( rowV & 2 " + ") >> 1 ) / 255.f, ( ( rowV & 4 ) >> 2 ) / 255.f, 1.f );\n" + " //colors[ tid * 2 + 0 ] = (float4)( (float)xVal/(float)horizWrap, " + "1.0f, 1.0f, 1.0f );\n" " colors[ tid * 2 + 1 ] = colors[ tid * 2 + 0 ];\n" - "}\n" }; - -//Passthrough VertexShader -static const char *vertexshader = -"#version 150\n" -"uniform mat4 projMatrix;\n" -"in vec4 inPosition;\n" -"in vec4 inColor;\n" -"out vec4 vertColor;\n" -"void main (void) {\n" -" gl_Position = projMatrix*inPosition;\n" -" vertColor = inColor;\n" -"}\n"; - -//Passthrough FragmentShader -static const char *fragmentshader = -"#version 150\n" -"in vec4 vertColor;\n" -"out vec4 outColor;\n" -"void main (void) {\n" -" outColor = vertColor;\n" -"}\n"; + "}\n" +}; + +// Passthrough VertexShader +static const char *vertexshader = "#version 150\n" + "uniform mat4 projMatrix;\n" + "in vec4 inPosition;\n" + "in vec4 inColor;\n" + "out vec4 vertColor;\n" + "void main (void) {\n" + " gl_Position = projMatrix*inPosition;\n" + " vertColor = inColor;\n" + "}\n"; + +// Passthrough FragmentShader +static const char *fragmentshader = "#version 150\n" + "in vec4 vertColor;\n" + "out vec4 outColor;\n" + "void main (void) {\n" + " outColor = vertColor;\n" + "}\n"; GLuint createShaderProgram(GLint *posLoc, GLint *colLoc) { - GLint logLength, status; + GLint logLength, status; GLuint program = glCreateProgram(); GLuint vpShader; @@ -153,8 +162,9 @@ GLuint createShaderProgram(GLint *posLoc, GLint *colLoc) glShaderSource(vpShader, 1, (const GLchar **)&vertexshader, NULL); glCompileShader(vpShader); glGetShaderiv(vpShader, GL_INFO_LOG_LENGTH, &logLength); - if (logLength > 0) { - GLchar *log = (GLchar*) malloc(logLength); + if (logLength > 0) + { + GLchar *log = (GLchar *)malloc(logLength); glGetShaderInfoLog(vpShader, logLength, &logLength, log); log_info("Vtx Shader compile log:\n%s", log); free(log); @@ -175,8 +185,9 @@ GLuint createShaderProgram(GLint *posLoc, GLint *colLoc) glCompileShader(fpShader); glGetShaderiv(fpShader, GL_INFO_LOG_LENGTH, &logLength); - if (logLength > 0) { - GLchar *log = (GLchar*)malloc(logLength); + if (logLength > 0) + { + GLchar *log = (GLchar *)malloc(logLength); glGetShaderInfoLog(fpShader, logLength, &logLength, log); log_info("Frag Shader compile log:\n%s", log); free(log); @@ -192,8 +203,9 @@ GLuint createShaderProgram(GLint *posLoc, GLint *colLoc) glLinkProgram(program); glGetProgramiv(program, GL_INFO_LOG_LENGTH, &logLength); - if (logLength > 0) { - GLchar *log = (GLchar*)malloc(logLength); + if (logLength > 0) + { + GLchar *log = (GLchar *)malloc(logLength); glGetProgramInfoLog(program, logLength, &logLength, log); log_info("Program link log:\n%s", log); free(log); @@ -219,7 +231,7 @@ void destroyShaderProgram(GLuint program) glUseProgram(0); glGetAttachedShaders(program, 2, &count, shaders); int i; - for(i = 0; i < count; i++) + for (i = 0; i < count; i++) { glDetachShader(program, shaders[i]); glDeleteShader(shaders[i]); @@ -227,44 +239,49 @@ void destroyShaderProgram(GLuint program) glDeleteProgram(program); } -// This function queues up and runs the above CL kernel that writes the vertex data -cl_int run_cl_kernel( cl_kernel kernel, cl_command_queue queue, cl_mem stream0, cl_mem stream1, - cl_int rowIdx, cl_event fenceEvent, size_t numThreads ) +// This function queues up and runs the above CL kernel that writes the vertex +// data +cl_int run_cl_kernel(cl_kernel kernel, cl_command_queue queue, cl_mem stream0, + cl_mem stream1, cl_int rowIdx, cl_event fenceEvent, + size_t numThreads) { - cl_int error = clSetKernelArg( kernel, 3, sizeof( rowIdx ), &rowIdx ); - test_error( error, "Unable to set kernel arguments" ); + cl_int error = clSetKernelArg(kernel, 3, sizeof(rowIdx), &rowIdx); + test_error(error, "Unable to set kernel arguments"); clEventWrapper acqEvent1, acqEvent2, kernEvent, relEvent1, relEvent2; - int numEvents = ( fenceEvent != NULL ) ? 1 : 0; - cl_event *fence_evt = ( fenceEvent != NULL ) ? &fenceEvent : NULL; + int numEvents = (fenceEvent != NULL) ? 1 : 0; + cl_event *fence_evt = (fenceEvent != NULL) ? &fenceEvent : NULL; - error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &stream0, numEvents, fence_evt, &acqEvent1 ); - test_error( error, "Unable to acquire GL obejcts"); - error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &stream1, numEvents, fence_evt, &acqEvent2 ); - test_error( error, "Unable to acquire GL obejcts"); + error = (*clEnqueueAcquireGLObjects_ptr)(queue, 1, &stream0, numEvents, + fence_evt, &acqEvent1); + test_error(error, "Unable to acquire GL obejcts"); + error = (*clEnqueueAcquireGLObjects_ptr)(queue, 1, &stream1, numEvents, + fence_evt, &acqEvent2); + test_error(error, "Unable to acquire GL obejcts"); - cl_event evts[ 2 ] = { acqEvent1, acqEvent2 }; + cl_event evts[2] = { acqEvent1, acqEvent2 }; - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &numThreads, NULL, 2, evts, &kernEvent ); - test_error( error, "Unable to execute test kernel" ); + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &numThreads, NULL, 2, + evts, &kernEvent); + test_error(error, "Unable to execute test kernel"); - error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &stream0, 1, &kernEvent, &relEvent1 ); + error = (*clEnqueueReleaseGLObjects_ptr)(queue, 1, &stream0, 1, &kernEvent, + &relEvent1); test_error(error, "clEnqueueReleaseGLObjects failed"); - error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &stream1, 1, &kernEvent, &relEvent2 ); + error = (*clEnqueueReleaseGLObjects_ptr)(queue, 1, &stream1, 1, &kernEvent, + &relEvent2); test_error(error, "clEnqueueReleaseGLObjects failed"); - evts[ 0 ] = relEvent1; - evts[ 1 ] = relEvent2; - error = clWaitForEvents( 2, evts ); - test_error( error, "Unable to wait for release events" ); + evts[0] = relEvent1; + evts[1] = relEvent2; + error = clWaitForEvents(2, evts); + test_error(error, "Unable to wait for release events"); return 0; } -class RunThread : public genericThread -{ +class RunThread : public genericThread { public: - cl_kernel mKernel; cl_command_queue mQueue; cl_mem mStream0, mStream1; @@ -272,34 +289,40 @@ class RunThread : public genericThread cl_event mFenceEvent; size_t mNumThreads; - RunThread( cl_kernel kernel, cl_command_queue queue, cl_mem stream0, cl_mem stream1, size_t numThreads ) - : mKernel( kernel ), mQueue( queue ), mStream0( stream0 ), mStream1( stream1 ), mNumThreads( numThreads ) - { - } + RunThread(cl_kernel kernel, cl_command_queue queue, cl_mem stream0, + cl_mem stream1, size_t numThreads) + : mKernel(kernel), mQueue(queue), mStream0(stream0), mStream1(stream1), + mNumThreads(numThreads) + {} - void SetRunData( cl_int rowIdx, cl_event fenceEvent ) + void SetRunData(cl_int rowIdx, cl_event fenceEvent) { mRowIdx = rowIdx; mFenceEvent = fenceEvent; } - virtual void * IRun( void ) + virtual void *IRun(void) { - cl_int error = run_cl_kernel( mKernel, mQueue, mStream0, mStream1, mRowIdx, mFenceEvent, mNumThreads ); + cl_int error = run_cl_kernel(mKernel, mQueue, mStream0, mStream1, + mRowIdx, mFenceEvent, mNumThreads); return (void *)(uintptr_t)error; } }; -int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_queue queue, bool separateThreads, GLint rend_vs, GLint read_vs, cl_device_id rend_device ) +int test_fence_sync_single(cl_device_id device, cl_context context, + cl_command_queue queue, bool separateThreads, + GLint rend_vs, GLint read_vs, + cl_device_id rend_device) { int error; const int framebufferSize = 512; - if( !is_extension_available( device, "cl_khr_gl_event" ) ) + if (!is_extension_available(device, "cl_khr_gl_event")) { - log_info( "NOTE: cl_khr_gl_event extension not present on this device; skipping fence sync test\n" ); + log_info("NOTE: cl_khr_gl_event extension not present on this device; " + "skipping fence sync test\n"); return 0; } @@ -312,10 +335,11 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_ clGetPlatformIDs(0, NULL, &nplatforms); clGetPlatformIDs(1, &platform, NULL); - if (nplatforms > 1) { + if (nplatforms > 1) + { log_info("clGetPlatformIDs returned multiple values. This is not " - "an error, but might result in obtaining incorrect function " - "pointers if you do not want the first returned platform.\n"); + "an error, but might result in obtaining incorrect function " + "pointers if you do not want the first returned platform.\n"); // Show them the platform name, in case it is a problem. @@ -323,28 +347,35 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_ char *name; clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &size); - name = (char*)malloc(size); + name = (char *)malloc(size); clGetPlatformInfo(platform, CL_PLATFORM_NAME, size, name, NULL); log_info("Using platform with name: %s \n", name); free(name); } - clCreateEventFromGLsyncKHR_ptr = (clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform(platform, "clCreateEventFromGLsyncKHR"); - if( clCreateEventFromGLsyncKHR_ptr == NULL ) + clCreateEventFromGLsyncKHR_ptr = + (clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform( + platform, "clCreateEventFromGLsyncKHR"); + if (clCreateEventFromGLsyncKHR_ptr == NULL) { - log_error( "ERROR: Unable to run fence_sync test (clCreateEventFromGLsyncKHR function not discovered!)\n" ); - clCreateEventFromGLsyncKHR_ptr = (clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform(platform, "clCreateEventFromGLsyncAPPLE"); + log_error("ERROR: Unable to run fence_sync test " + "(clCreateEventFromGLsyncKHR function not discovered!)\n"); + clCreateEventFromGLsyncKHR_ptr = (clCreateEventFromGLsyncKHR_fn) + clGetExtensionFunctionAddressForPlatform( + platform, "clCreateEventFromGLsyncAPPLE"); return -1; } #ifdef USING_ARB_sync - char *gl_version_str = (char*)glGetString( GL_VERSION ); + char *gl_version_str = (char *)glGetString(GL_VERSION); float glCoreVersion; sscanf(gl_version_str, "%f", &glCoreVersion); - if( glCoreVersion < 3.0f ) + if (glCoreVersion < 3.0f) { - log_info( "OpenGL version %f does not support fence/sync! Skipping test.\n", glCoreVersion ); + log_info( + "OpenGL version %f does not support fence/sync! Skipping test.\n", + glCoreVersion); return 0; } @@ -354,10 +385,13 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_ GLint val, screen; CGLGetVirtualScreen(currCtx, &screen); CGLDescribePixelFormat(pixFmt, screen, kCGLPFAOpenGLProfile, &val); - if(val != kCGLOGLPVersion_3_2_Core) + if (val != kCGLOGLPVersion_3_2_Core) { - log_error( "OpenGL context was not created with OpenGL version >= 3.0 profile even though platform supports it" - "OpenGL profile %f does not support fence/sync! Skipping test.\n", glCoreVersion ); + log_error( + "OpenGL context was not created with OpenGL version >= 3.0 profile " + "even though platform supports it" + "OpenGL profile %f does not support fence/sync! Skipping test.\n", + glCoreVersion); return -1; } #else @@ -365,7 +399,7 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_ HDC hdc = wglGetCurrentDC(); HGLRC hglrc = wglGetCurrentContext(); #else - Display* dpy = glXGetCurrentDisplay(); + Display *dpy = glXGetCurrentDisplay(); GLXDrawable drawable = glXGetCurrentDrawable(); GLXContext ctx = glXGetCurrentContext(); #endif @@ -386,51 +420,66 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_ GLint posLoc, colLoc; GLuint shaderprogram = createShaderProgram(&posLoc, &colLoc); - if(!shaderprogram) + if (!shaderprogram) { log_error("Failed to create shader program\n"); return -1; } - float l = 0.0f; float r = framebufferSize; - float b = 0.0f; float t = framebufferSize; - - float projMatrix[16] = { 2.0f/(r-l), 0.0f, 0.0f, 0.0f, - 0.0f, 2.0f/(t-b), 0.0f, 0.0f, - 0.0f, 0.0f, -1.0f, 0.0f, - -(r+l)/(r-l), -(t+b)/(t-b), 0.0f, 1.0f - }; + float l = 0.0f; + float r = framebufferSize; + float b = 0.0f; + float t = framebufferSize; + + float projMatrix[16] = { 2.0f / (r - l), + 0.0f, + 0.0f, + 0.0f, + 0.0f, + 2.0f / (t - b), + 0.0f, + 0.0f, + 0.0f, + 0.0f, + -1.0f, + 0.0f, + -(r + l) / (r - l), + -(t + b) / (t - b), + 0.0f, + 1.0f }; glUseProgram(shaderprogram); GLuint projMatLoc = glGetUniformLocation(shaderprogram, "projMatrix"); glUniformMatrix4fv(projMatLoc, 1, 0, projMatrix); glUseProgram(0); - // Note: the framebuffer is just the target to verify our results against, so we don't - // really care to go through all the possible formats in this case + // Note: the framebuffer is just the target to verify our results against, + // so we don't really care to go through all the possible formats in this + // case glFramebufferWrapper glFramebuffer; glRenderbufferWrapper glRenderbuffer; - error = CreateGLRenderbufferRaw( framebufferSize, 128, GL_COLOR_ATTACHMENT0_EXT, - GL_RGBA, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, - &glFramebuffer, &glRenderbuffer ); - if( error != 0 ) - return error; + error = CreateGLRenderbufferRaw( + framebufferSize, 128, GL_COLOR_ATTACHMENT0_EXT, GL_RGBA, GL_RGBA, + GL_UNSIGNED_INT_8_8_8_8_REV, &glFramebuffer, &glRenderbuffer); + if (error != 0) return error; GLuint vao; glGenVertexArrays(1, &vao); glBindVertexArray(vao); glBufferWrapper vtxBuffer, colorBuffer; - glGenBuffers( 1, &vtxBuffer ); - glGenBuffers( 1, &colorBuffer ); + glGenBuffers(1, &vtxBuffer); + glGenBuffers(1, &colorBuffer); - const int numHorizVertices = ( framebufferSize * 64 ) + 1; + const int numHorizVertices = (framebufferSize * 64) + 1; - glBindBuffer( GL_ARRAY_BUFFER, vtxBuffer ); - glBufferData( GL_ARRAY_BUFFER, sizeof( GLfloat ) * numHorizVertices * 2 * 4, NULL, GL_STATIC_DRAW ); + glBindBuffer(GL_ARRAY_BUFFER, vtxBuffer); + glBufferData(GL_ARRAY_BUFFER, sizeof(GLfloat) * numHorizVertices * 2 * 4, + NULL, GL_STATIC_DRAW); - glBindBuffer( GL_ARRAY_BUFFER, colorBuffer ); - glBufferData( GL_ARRAY_BUFFER, sizeof( GLfloat ) * numHorizVertices * 2 * 4, NULL, GL_STATIC_DRAW ); + glBindBuffer(GL_ARRAY_BUFFER, colorBuffer); + glBufferData(GL_ARRAY_BUFFER, sizeof(GLfloat) * numHorizVertices * 2 * 4, + NULL, GL_STATIC_DRAW); // Now that the requisite objects are bound, we can attempt program // validation: @@ -439,8 +488,9 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_ GLint logLength, status; glGetProgramiv(shaderprogram, GL_INFO_LOG_LENGTH, &logLength); - if (logLength > 0) { - GLchar *log = (GLchar*)malloc(logLength); + if (logLength > 0) + { + GLchar *log = (GLchar *)malloc(logLength); glGetProgramInfoLog(shaderprogram, logLength, &logLength, log); log_info("Program validate log:\n%s", log); free(log); @@ -455,125 +505,131 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_ clProgramWrapper program; clKernelWrapper kernel; - clMemWrapper streams[ 2 ]; + clMemWrapper streams[2]; - if( create_single_kernel_helper( context, &program, &kernel, 1, updateBuffersKernel, "update" ) ) + if (create_single_kernel_helper(context, &program, &kernel, 1, + updateBuffersKernel, "update")) return -1; - streams[ 0 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_WRITE, vtxBuffer, &error ); - test_error( error, "Unable to create CL buffer from GL vertex buffer" ); + streams[0] = (*clCreateFromGLBuffer_ptr)(context, CL_MEM_READ_WRITE, + vtxBuffer, &error); + test_error(error, "Unable to create CL buffer from GL vertex buffer"); - streams[ 1 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_WRITE, colorBuffer, &error ); - test_error( error, "Unable to create CL buffer from GL color buffer" ); + streams[1] = (*clCreateFromGLBuffer_ptr)(context, CL_MEM_READ_WRITE, + colorBuffer, &error); + test_error(error, "Unable to create CL buffer from GL color buffer"); - error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] ); - test_error( error, "Unable to set kernel arguments" ); + error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]); + test_error(error, "Unable to set kernel arguments"); - error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] ); - test_error( error, "Unable to set kernel arguments" ); + error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]); + test_error(error, "Unable to set kernel arguments"); cl_int horizWrap = (cl_int)framebufferSize; - error = clSetKernelArg( kernel, 2, sizeof( horizWrap ), &horizWrap ); - test_error( error, "Unable to set kernel arguments" ); + error = clSetKernelArg(kernel, 2, sizeof(horizWrap), &horizWrap); + test_error(error, "Unable to set kernel arguments"); - glViewport( 0, 0, framebufferSize, framebufferSize ); - glClearColor( 0, 0, 0, 0 ); - glClear( GL_COLOR_BUFFER_BIT ); - glClear( GL_DEPTH_BUFFER_BIT ); - glDisable( GL_DEPTH_TEST ); - glEnable( GL_BLEND ); - glBlendFunc( GL_ONE, GL_ONE ); + glViewport(0, 0, framebufferSize, framebufferSize); + glClearColor(0, 0, 0, 0); + glClear(GL_COLOR_BUFFER_BIT); + glClear(GL_DEPTH_BUFFER_BIT); + glDisable(GL_DEPTH_TEST); + glEnable(GL_BLEND); + glBlendFunc(GL_ONE, GL_ONE); clEventWrapper fenceEvent; GLsync glFence = 0; // Do a loop through 8 different horizontal stripes against the framebuffer - RunThread thread( kernel, queue, streams[ 0 ], streams[ 1 ], (size_t)numHorizVertices ); + RunThread thread(kernel, queue, streams[0], streams[1], + (size_t)numHorizVertices); - for( int i = 0; i < 8; i++ ) + for (int i = 0; i < 8; i++) { // if current rendering device is not the compute device and // separateThreads == false which means compute is going on same // thread and we are using implicit synchronization (no GLSync obj used) - // then glFlush by clEnqueueAcquireGLObject is not sufficient ... we need - // to wait for rendering to finish on other device before CL can start - // writing to CL/GL shared mem objects. When separateThreads is true i.e. - // we are using GLSync obj to synchronize then we dont need to call glFinish - // here since CL should wait for rendering on other device before this - // GLSync object to finish before it starts writing to shared mem object. - // Also rend_device == compute_device no need to call glFinish - if(rend_device != device && !separateThreads) - glFinish(); - - if( separateThreads ) + // then glFlush by clEnqueueAcquireGLObject is not sufficient ... we + // need to wait for rendering to finish on other device before CL can + // start writing to CL/GL shared mem objects. When separateThreads is + // true i.e. we are using GLSync obj to synchronize then we dont need to + // call glFinish here since CL should wait for rendering on other device + // before this GLSync object to finish before it starts writing to + // shared mem object. Also rend_device == compute_device no need to call + // glFinish + if (rend_device != device && !separateThreads) glFinish(); + + if (separateThreads) { - if (fenceEvent != NULL) - { - clReleaseEvent(fenceEvent); - glDeleteSyncFunc(glFence); - } + glDeleteSyncFunc(glFence); glFence = glFenceSyncFunc(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - fenceEvent = clCreateEventFromGLsyncKHR_ptr(context, glFence, &error); + fenceEvent = + clCreateEventFromGLsyncKHR_ptr(context, glFence, &error); test_error(error, "Unable to create CL event from GL fence"); - // in case of explicit synchronization, we just wait for the sync object to complete - // in clEnqueueAcquireGLObject but we dont flush. Its application's responsibility - // to flush on the context on which glSync is created + // in case of explicit synchronization, we just wait for the sync + // object to complete in clEnqueueAcquireGLObject but we dont flush. + // Its application's responsibility to flush on the context on which + // glSync is created glFlush(); - thread.SetRunData( (cl_int)i, fenceEvent ); + thread.SetRunData((cl_int)i, fenceEvent); thread.Start(); error = (cl_int)(size_t)thread.Join(); } else { - error = run_cl_kernel( kernel, queue, streams[ 0 ], streams[ 1 ], (cl_int)i, fenceEvent, (size_t)numHorizVertices ); + error = + run_cl_kernel(kernel, queue, streams[0], streams[1], (cl_int)i, + fenceEvent, (size_t)numHorizVertices); } - test_error( error, "Unable to run CL kernel" ); + test_error(error, "Unable to run CL kernel"); glUseProgram(shaderprogram); glEnableVertexAttribArray(posLoc); glEnableVertexAttribArray(colLoc); - glBindBuffer( GL_ARRAY_BUFFER, vtxBuffer ); - glVertexAttribPointer(posLoc, 4, GL_FLOAT, GL_FALSE, 4*sizeof(GLfloat), 0); - glBindBuffer( GL_ARRAY_BUFFER, colorBuffer ); - glVertexAttribPointer(colLoc, 4, GL_FLOAT, GL_FALSE, 4*sizeof(GLfloat), 0); - glBindBuffer( GL_ARRAY_BUFFER, 0 ); + glBindBuffer(GL_ARRAY_BUFFER, vtxBuffer); + glVertexAttribPointer(posLoc, 4, GL_FLOAT, GL_FALSE, + 4 * sizeof(GLfloat), 0); + glBindBuffer(GL_ARRAY_BUFFER, colorBuffer); + glVertexAttribPointer(colLoc, 4, GL_FLOAT, GL_FALSE, + 4 * sizeof(GLfloat), 0); + glBindBuffer(GL_ARRAY_BUFFER, 0); - glDrawArrays( GL_TRIANGLE_STRIP, 0, numHorizVertices * 2 ); + glDrawArrays(GL_TRIANGLE_STRIP, 0, numHorizVertices * 2); glDisableVertexAttribArray(posLoc); glDisableVertexAttribArray(colLoc); glUseProgram(0); - if( separateThreads ) + if (separateThreads) { - // If we're on the same thread, then we're testing implicit syncing, so we - // don't need the actual fence code - if( fenceEvent != NULL ) - { - clReleaseEvent( fenceEvent ); - glDeleteSyncFunc( glFence ); - } + // If we're on the same thread, then we're testing implicit syncing, + // so we don't need the actual fence code + glDeleteSyncFunc(glFence); + - glFence = glFenceSyncFunc( GL_SYNC_GPU_COMMANDS_COMPLETE, 0 ); - fenceEvent = clCreateEventFromGLsyncKHR_ptr( context, glFence, &error ); - test_error( error, "Unable to create CL event from GL fence" ); + glFence = glFenceSyncFunc(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + fenceEvent = + clCreateEventFromGLsyncKHR_ptr(context, glFence, &error); + test_error(error, "Unable to create CL event from GL fence"); - // in case of explicit synchronization, we just wait for the sync object to complete - // in clEnqueueAcquireGLObject but we dont flush. Its application's responsibility - // to flush on the context on which glSync is created + // in case of explicit synchronization, we just wait for the sync + // object to complete in clEnqueueAcquireGLObject but we dont flush. + // Its application's responsibility to flush on the context on which + // glSync is created glFlush(); } else glFinish(); } - if( glFence != 0 ) - // Don't need the final release for fenceEvent, because the wrapper will take care of that - glDeleteSyncFunc( glFence ); + if (glFence != 0) + // Don't need the final release for fenceEvent, because the wrapper will + // take care of that + glDeleteSyncFunc(glFence); #ifdef __APPLE__ CGLSetVirtualScreen(CGLGetCurrentContext(), read_vs); @@ -585,54 +641,62 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_ #endif #endif // Grab the contents of the final framebuffer - BufferOwningPtr resultData( ReadGLRenderbuffer( glFramebuffer, glRenderbuffer, - GL_COLOR_ATTACHMENT0_EXT, - GL_RGBA, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, kUChar, - framebufferSize, 128 ) ); - - // Check the contents now. We should end up with solid color bands 32 pixels high and the - // full width of the framebuffer, at values (128,128,128) due to the additive blending - for( int i = 0; i < 8; i++ ) + BufferOwningPtr resultData(ReadGLRenderbuffer( + glFramebuffer, glRenderbuffer, GL_COLOR_ATTACHMENT0_EXT, GL_RGBA, + GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, kUChar, framebufferSize, 128)); + + // Check the contents now. We should end up with solid color bands 32 pixels + // high and the full width of the framebuffer, at values (128,128,128) due + // to the additive blending + for (int i = 0; i < 8; i++) { - for( int y = 0; y < 4; y++ ) + for (int y = 0; y < 4; y++) { - // Note: coverage will be double because the 63-0 triangle overwrites again at the end of the pass - cl_uchar valA = ( ( ( i + 1 ) & 1 ) ) * numHorizVertices * 2 / framebufferSize; - cl_uchar valB = ( ( ( i + 1 ) & 2 ) >> 1 ) * numHorizVertices * 2 / framebufferSize; - cl_uchar valC = ( ( ( i + 1 ) & 4 ) >> 2 ) * numHorizVertices * 2 / framebufferSize; - - cl_uchar *row = (cl_uchar *)&resultData[ ( i * 16 + y ) * framebufferSize * 4 ]; - for( int x = 0; x < ( framebufferSize - 1 ) - 1; x++ ) + // Note: coverage will be double because the 63-0 triangle + // overwrites again at the end of the pass + cl_uchar valA = + (((i + 1) & 1)) * numHorizVertices * 2 / framebufferSize; + cl_uchar valB = + (((i + 1) & 2) >> 1) * numHorizVertices * 2 / framebufferSize; + cl_uchar valC = + (((i + 1) & 4) >> 2) * numHorizVertices * 2 / framebufferSize; + + cl_uchar *row = + (cl_uchar *)&resultData[(i * 16 + y) * framebufferSize * 4]; + for (int x = 0; x < (framebufferSize - 1) - 1; x++) { - if( ( row[ x * 4 ] != valA ) || ( row[ x * 4 + 1 ] != valB ) || - ( row[ x * 4 + 2 ] != valC ) ) + if ((row[x * 4] != valA) || (row[x * 4 + 1] != valB) + || (row[x * 4 + 2] != valC)) { - log_error( "ERROR: Output framebuffer did not validate!\n" ); - DumpGLBuffer( GL_UNSIGNED_BYTE, framebufferSize, 128, resultData ); - log_error( "RUNS:\n" ); + log_error("ERROR: Output framebuffer did not validate!\n"); + DumpGLBuffer(GL_UNSIGNED_BYTE, framebufferSize, 128, + resultData); + log_error("RUNS:\n"); uint32_t *p = (uint32_t *)(char *)resultData; size_t a = 0; - for( size_t t = 1; t < framebufferSize * framebufferSize; t++ ) + for (size_t t = 1; t < framebufferSize * framebufferSize; + t++) { - if( p[ a ] != 0 ) + if (p[a] != 0) { - if( p[ t ] == 0 ) + if (p[t] == 0) { - log_error( "RUN: %ld to %ld (%d,%d to %d,%d) 0x%08x\n", a, t - 1, - (int)( a % framebufferSize ), (int)( a / framebufferSize ), - (int)( ( t - 1 ) % framebufferSize ), (int)( ( t - 1 ) / framebufferSize ), - p[ a ] ); + log_error( + "RUN: %ld to %ld (%d,%d to %d,%d) 0x%08x\n", + a, t - 1, (int)(a % framebufferSize), + (int)(a / framebufferSize), + (int)((t - 1) % framebufferSize), + (int)((t - 1) / framebufferSize), p[a]); a = t; } } else { - if( p[ t ] != 0 ) + if (p[t] != 0) { a = t; } } - } return -1; } @@ -645,46 +709,56 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_ return 0; } -int test_fence_sync( cl_device_id device, cl_context context, cl_command_queue queue, int numElements ) +int test_fence_sync(cl_device_id device, cl_context context, + cl_command_queue queue, int numElements) { GLint vs_count = 0; cl_device_id *device_list = NULL; - if( !is_extension_available( device, "cl_khr_gl_event" ) ) + if (!is_extension_available(device, "cl_khr_gl_event")) { - log_info( "NOTE: cl_khr_gl_event extension not present on this device; skipping fence sync test\n" ); + log_info("NOTE: cl_khr_gl_event extension not present on this device; " + "skipping fence sync test\n"); return 0; } #ifdef __APPLE__ CGLContextObj ctx = CGLGetCurrentContext(); CGLPixelFormatObj pix = CGLGetPixelFormat(ctx); - CGLError err = CGLDescribePixelFormat(pix, 0, kCGLPFAVirtualScreenCount, &vs_count); + CGLError err = + CGLDescribePixelFormat(pix, 0, kCGLPFAVirtualScreenCount, &vs_count); - device_list = (cl_device_id *) malloc(sizeof(cl_device_id)*vs_count); - clGetGLContextInfoAPPLE(context, ctx, CL_CGL_DEVICES_FOR_SUPPORTED_VIRTUAL_SCREENS_APPLE, sizeof(cl_device_id)*vs_count, device_list, NULL); + device_list = (cl_device_id *)malloc(sizeof(cl_device_id) * vs_count); + clGetGLContextInfoAPPLE(context, ctx, + CL_CGL_DEVICES_FOR_SUPPORTED_VIRTUAL_SCREENS_APPLE, + sizeof(cl_device_id) * vs_count, device_list, NULL); #else - // Need platform specific way of getting devices from CL context to which OpenGL can render - // If not available it can be replaced with clGetContextInfo with CL_CONTEXT_DEVICES + // Need platform specific way of getting devices from CL context to which + // OpenGL can render If not available it can be replaced with + // clGetContextInfo with CL_CONTEXT_DEVICES size_t device_cb; - cl_int err = clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &device_cb); - if( err != CL_SUCCESS ) + cl_int err = + clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &device_cb); + if (err != CL_SUCCESS) { - print_error( err, "Unable to get device count from context" ); - return -1; + print_error(err, "Unable to get device count from context"); + return -1; } vs_count = (GLint)device_cb / sizeof(cl_device_id); - if (vs_count < 1) { - log_error("No devices found.\n"); - return -1; + if (vs_count < 1) + { + log_error("No devices found.\n"); + return -1; } - device_list = (cl_device_id *) malloc(device_cb); - err = clGetContextInfo( context, CL_CONTEXT_DEVICES, device_cb, device_list, NULL); - if( err != CL_SUCCESS ) { - free(device_list); - print_error( err, "Unable to get device list from context" ); - return -1; + device_list = (cl_device_id *)malloc(device_cb); + err = clGetContextInfo(context, CL_CONTEXT_DEVICES, device_cb, device_list, + NULL); + if (err != CL_SUCCESS) + { + free(device_list); + print_error(err, "Unable to get device list from context"); + return -1; } #endif @@ -695,30 +769,38 @@ int test_fence_sync( cl_device_id device, cl_context context, cl_command_queue q // Loop through all the devices capable to OpenGL rendering // and set them as current rendering target - for(rend_vs = 0; rend_vs < vs_count; rend_vs++) + for (rend_vs = 0; rend_vs < vs_count; rend_vs++) { // Loop through all the devices and set them as current // compute target - for(read_vs = 0; read_vs < vs_count; read_vs++) + for (read_vs = 0; read_vs < vs_count; read_vs++) { - cl_device_id rend_device = device_list[rend_vs], read_device = device_list[read_vs]; + cl_device_id rend_device = device_list[rend_vs], + read_device = device_list[read_vs]; char rend_name[200], read_name[200]; - clGetDeviceInfo(rend_device, CL_DEVICE_NAME, sizeof(rend_name), rend_name, NULL); - clGetDeviceInfo(read_device, CL_DEVICE_NAME, sizeof(read_name), read_name, NULL); + clGetDeviceInfo(rend_device, CL_DEVICE_NAME, sizeof(rend_name), + rend_name, NULL); + clGetDeviceInfo(read_device, CL_DEVICE_NAME, sizeof(read_name), + read_name, NULL); - log_info("Rendering on: %s, read back on: %s\n", rend_name, read_name); - error = test_fence_sync_single( device, context, queue, false, rend_vs, read_vs, rend_device ); + log_info("Rendering on: %s, read back on: %s\n", rend_name, + read_name); + error = test_fence_sync_single(device, context, queue, false, + rend_vs, read_vs, rend_device); any_failed |= error; - if( error != 0 ) - log_error( "ERROR: Implicit syncing with GL sync events failed!\n\n" ); + if (error != 0) + log_error( + "ERROR: Implicit syncing with GL sync events failed!\n\n"); else log_info("Implicit syncing Passed\n"); - error = test_fence_sync_single( device, context, queue, true, rend_vs, read_vs, rend_device ); + error = test_fence_sync_single(device, context, queue, true, + rend_vs, read_vs, rend_device); any_failed |= error; - if( error != 0 ) - log_error( "ERROR: Explicit syncing with GL sync events failed!\n\n" ); + if (error != 0) + log_error( + "ERROR: Explicit syncing with GL sync events failed!\n\n"); else log_info("Explicit syncing Passed\n"); } diff --git a/test_conformance/gl/test_image_methods.cpp b/test_conformance/gl/test_image_methods.cpp index afaa08fc9a..07f5b65e83 100644 --- a/test_conformance/gl/test_image_methods.cpp +++ b/test_conformance/gl/test_image_methods.cpp @@ -287,10 +287,10 @@ int test_image_format_methods( cl_device_id device, cl_context context, cl_comma error = clSetKernelArg( kernel, 1, sizeof( outDataBuffer ), &outDataBuffer ); test_error( error, "Unable to set kernel argument" ); - // Flush and Acquire. - glFlush(); - error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &image, 0, NULL, NULL); - test_error( error, "Unable to acquire GL obejcts"); + // Finish and Acquire. + glFinish(); + error = (*clEnqueueAcquireGLObjects_ptr)(queue, 1, &image, 0, NULL, NULL); + test_error(error, "Unable to acquire GL obejcts"); size_t threads[1] = { 1 }, localThreads[1] = { 1 }; diff --git a/test_conformance/gles/CMakeLists.txt b/test_conformance/gles/CMakeLists.txt index c76fe51248..4f4ba53216 100644 --- a/test_conformance/gles/CMakeLists.txt +++ b/test_conformance/gles/CMakeLists.txt @@ -18,3 +18,11 @@ set (${MODULE_NAME}_SOURCES list(APPEND CLConform_LIBRARIES EGL GLESv2) include(../CMakeCommon.txt) + +if(DEFINED USE_GLES3) + target_compile_definitions(${${MODULE_NAME}_OUT} PRIVATE GLES3) +endif() +if(MSVC) + # Don't warn about using the portable "strdup" function. + target_compile_definitions(${${MODULE_NAME}_OUT} PRIVATE _CRT_NONSTDC_NO_DEPRECATE) +endif() \ No newline at end of file diff --git a/test_conformance/gles/setup_egl.cpp b/test_conformance/gles/setup_egl.cpp index fe0f8ca36f..95a12a667f 100644 --- a/test_conformance/gles/setup_egl.cpp +++ b/test_conformance/gles/setup_egl.cpp @@ -117,7 +117,8 @@ class EGLGLEnvironment : public GLEnvironment _platform, "clGetGLContextInfoKHR"); if (GetGLContextInfo == NULL) { - print_error(status, "clGetGLContextInfoKHR failed"); + log_error("ERROR: clGetGLContextInfoKHR failed! (%s:%d)\n", + __FILE__, __LINE__); return NULL; } @@ -128,7 +129,7 @@ class EGLGLEnvironment : public GLEnvironment return NULL; } dev_size /= sizeof(cl_device_id); - log_info("GL _context supports %d compute devices\n", dev_size); + log_info("GL _context supports %zu compute devices\n", dev_size); status = GetGLContextInfo(properties, CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR, diff --git a/test_conformance/gles/test_buffers.cpp b/test_conformance/gles/test_buffers.cpp index a2d67322a9..73711261a5 100644 --- a/test_conformance/gles/test_buffers.cpp +++ b/test_conformance/gles/test_buffers.cpp @@ -205,10 +205,10 @@ int test_buffer_kernel(cl_context context, cl_command_queue queue, ExplicitType if (validate_only) { int result = (CheckGLObjectInfo(streams[0], CL_GL_OBJECT_BUFFER, (GLuint)inGLBuffer, (GLenum)0, 0) | CheckGLObjectInfo(streams[2], CL_GL_OBJECT_BUFFER, (GLuint)outGLBuffer, (GLenum)0, 0) ); - for(i=0;i<3;i++) + + for (i = 0; i < 3; i++) { - clReleaseMemObject(streams[i]); - streams[i] = NULL; + streams[i].reset(); } glDeleteBuffers(1, &inGLBuffer); inGLBuffer = 0; @@ -285,10 +285,9 @@ int test_buffer_kernel(cl_context context, cl_command_queue queue, ExplicitType clP += get_explicit_type_size( vecType ); } - for(i=0;i<3;i++) + for (i = 0; i < 3; i++) { - clReleaseMemObject(streams[i]); - streams[i] = NULL; + streams[i].reset(); } glDeleteBuffers(1, &inGLBuffer); inGLBuffer = 0; diff --git a/test_conformance/gles/test_fence_sync.cpp b/test_conformance/gles/test_fence_sync.cpp index 75e9d358bf..968d969522 100644 --- a/test_conformance/gles/test_fence_sync.cpp +++ b/test_conformance/gles/test_fence_sync.cpp @@ -160,7 +160,7 @@ GLuint createShaderProgram(GLint *posLoc, GLint *colLoc) glAttachShader(program, vpShader); GLuint fpShader; - char* fpstr = (char*)malloc(strlen(fragmentshader)); + char *fpstr = (char *)malloc(sizeof(fragmentshader)); strcpy(fpstr, fragmentshader); fpShader = glCreateShader(GL_FRAGMENT_SHADER); glShaderSource(fpShader, 1, (const GLchar **)&fpstr, NULL); @@ -570,10 +570,12 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_ { if( p[ t ] == 0 ) { - log_error( "RUN: %ld to %ld (%d,%d to %d,%d) 0x%08x\n", a, t - 1, - (int)( a % framebufferSize ), (int)( a / framebufferSize ), - (int)( ( t - 1 ) % framebufferSize ), (int)( ( t - 1 ) / framebufferSize ), - p[ a ] ); + log_error( + "RUN: %zu to %zu (%d,%d to %d,%d) 0x%08x\n", + a, t - 1, (int)(a % framebufferSize), + (int)(a / framebufferSize), + (int)((t - 1) % framebufferSize), + (int)((t - 1) / framebufferSize), p[a]); a = t; } } diff --git a/test_conformance/gles/test_images_2D.cpp b/test_conformance/gles/test_images_2D.cpp index c1a17fc8d8..f6554023f2 100644 --- a/test_conformance/gles/test_images_2D.cpp +++ b/test_conformance/gles/test_images_2D.cpp @@ -369,7 +369,9 @@ int test_images_read_cube( cl_device_id device, cl_context context, cl_command_q } +#ifdef __APPLE__ #pragma mark -------------------- Write tests ------------------------- +#endif int test_cl_image_write( cl_context context, cl_command_queue queue, cl_mem clImage, diff --git a/test_conformance/gles/test_renderbuffer.cpp b/test_conformance/gles/test_renderbuffer.cpp index 20127aca8d..0f6d289b9c 100644 --- a/test_conformance/gles/test_renderbuffer.cpp +++ b/test_conformance/gles/test_renderbuffer.cpp @@ -197,7 +197,9 @@ int test_renderbuffer_read( cl_device_id device, cl_context context, cl_command_ } +#ifdef __APPLE__ #pragma mark -------------------- Write tests ------------------------- +#endif int test_attach_renderbuffer_write_to_image( cl_context context, cl_command_queue queue, GLenum glTarget, GLuint glRenderbuffer, size_t imageWidth, size_t imageHeight, cl_image_format *outFormat, ExplicitType *outType, MTdata d, void **outSourceBuffer ) diff --git a/test_conformance/half/Test_roundTrip.cpp b/test_conformance/half/Test_roundTrip.cpp index 69fc7e4184..1ab4093763 100644 --- a/test_conformance/half/Test_roundTrip.cpp +++ b/test_conformance/half/Test_roundTrip.cpp @@ -14,6 +14,9 @@ // limitations under the License. // #include + +#include + #include "cl_utils.h" #include "tests.h" #include "harness/testHarness.h" @@ -156,7 +159,7 @@ int test_roundTrip( cl_device_id device, cl_context context, cl_command_queue qu } // Figure out how many elements are in a work block - size_t elementSize = MAX( sizeof(cl_half), sizeof(cl_float)); + size_t elementSize = std::max(sizeof(cl_half), sizeof(cl_float)); size_t blockCount = (size_t)getBufferSize(device) / elementSize; //elementSize is a power of two uint64_t lastCase = 1ULL << (8*sizeof(cl_half)); // number of cl_half size_t stride = blockCount; @@ -168,7 +171,7 @@ int test_roundTrip( cl_device_id device, cl_context context, cl_command_queue qu for( i = 0; i < (uint64_t)lastCase; i += stride ) { - count = (uint32_t) MIN( blockCount, lastCase - i ); + count = (uint32_t)std::min((uint64_t)blockCount, lastCase - i); //Init the input stream uint16_t *p = (uint16_t *)gIn_half; diff --git a/test_conformance/half/Test_vLoadHalf.cpp b/test_conformance/half/Test_vLoadHalf.cpp index 52867c25e7..e93540191d 100644 --- a/test_conformance/half/Test_vLoadHalf.cpp +++ b/test_conformance/half/Test_vLoadHalf.cpp @@ -17,6 +17,9 @@ #include "harness/testHarness.h" #include + +#include + #include "cl_utils.h" #include "tests.h" @@ -37,14 +40,12 @@ int Test_vLoadHalf_private( cl_device_id device, bool aligned ) const char *vector_size_names[] = {"1", "2", "4", "8", "16", "3"}; int minVectorSize = kMinVectorSize; - // There is no aligned scalar vloada_half in CL 1.1 -#if ! defined( CL_VERSION_1_1 ) && ! defined(__APPLE__) - vlog("Note: testing vloada_half.\n"); - if (aligned && minVectorSize == 0) - minVectorSize = 1; -#endif - for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++) + // There is no aligned scalar vloada_half + if (aligned && minVectorSize == 0) minVectorSize = 1; + + for (vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; + vectorSize++) { int effectiveVectorSize = g_arrVecSizes[vectorSize]; @@ -81,7 +82,7 @@ int Test_vLoadHalf_private( cl_device_id device, bool aligned ) "{\n" " size_t i = get_global_id(0);\n" " f[i] = vloada_half3( i, p );\n" - " ((__global float *)f)[4*i+3] = vloada_half(4*i+3,p);\n" + " ((__global float *)f)[4*i+3] = vload_half(4*i+3,p);\n" "}\n" }; @@ -431,7 +432,7 @@ int Test_vLoadHalf_private( cl_device_id device, bool aligned ) } // Figure out how many elements are in a work block - size_t elementSize = MAX( sizeof(cl_half), sizeof(cl_float)); + size_t elementSize = std::max(sizeof(cl_half), sizeof(cl_float)); size_t blockCount = getBufferSize(device) / elementSize; // elementSize is power of 2 uint64_t lastCase = 1ULL << (8*sizeof(cl_half)); // number of things of size cl_half @@ -449,7 +450,7 @@ int Test_vLoadHalf_private( cl_device_id device, bool aligned ) for( i = 0; i < (uint64_t)lastCase; i += blockCount ) { - count = (uint32_t) MIN( blockCount, lastCase - i ); + count = (uint32_t)std::min((uint64_t)blockCount, lastCase - i); //Init the input stream uint16_t *p = (uint16_t *)gIn_half; diff --git a/test_conformance/half/Test_vStoreHalf.cpp b/test_conformance/half/Test_vStoreHalf.cpp index c3a328ad64..85824a9fb9 100644 --- a/test_conformance/half/Test_vStoreHalf.cpp +++ b/test_conformance/half/Test_vStoreHalf.cpp @@ -18,6 +18,9 @@ #include "harness/testHarness.h" #include + +#include + #include "cl_utils.h" #include "tests.h" @@ -674,7 +677,7 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR } // end for vector size // Figure out how many elements are in a work block - size_t elementSize = MAX( sizeof(cl_ushort), sizeof(float)); + size_t elementSize = std::max(sizeof(cl_ushort), sizeof(float)); size_t blockCount = BUFFER_SIZE / elementSize; // elementSize is power of 2 uint64_t lastCase = 1ULL << (8*sizeof(float)); // number of floats. size_t stride = blockCount; @@ -726,7 +729,7 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR for( i = 0; i < lastCase; i += stride ) { - count = (cl_uint) MIN( blockCount, lastCase - i ); + count = (cl_uint)std::min((uint64_t)blockCount, lastCase - i); fref.i = i; dref.i = i; @@ -1272,7 +1275,7 @@ int Test_vStoreaHalf_private( cl_device_id device, f2h referenceFunc, d2h double } // Figure out how many elements are in a work block - size_t elementSize = MAX( sizeof(cl_ushort), sizeof(float)); + size_t elementSize = std::max(sizeof(cl_ushort), sizeof(float)); size_t blockCount = BUFFER_SIZE / elementSize; uint64_t lastCase = 1ULL << (8*sizeof(float)); size_t stride = blockCount; @@ -1323,7 +1326,7 @@ int Test_vStoreaHalf_private( cl_device_id device, f2h referenceFunc, d2h double for( i = 0; i < (uint64_t)lastCase; i += stride ) { - count = (cl_uint) MIN( blockCount, lastCase - i ); + count = (cl_uint)std::min((uint64_t)blockCount, lastCase - i); fref.i = i; dref.i = i; diff --git a/test_conformance/images/clCopyImage/test_copy_generic.cpp b/test_conformance/images/clCopyImage/test_copy_generic.cpp index d56ae7706c..bd935e7f34 100644 --- a/test_conformance/images/clCopyImage/test_copy_generic.cpp +++ b/test_conformance/images/clCopyImage/test_copy_generic.cpp @@ -105,23 +105,41 @@ cl_mem create_image( cl_context context, cl_command_queue queue, BufferOwningPtr if ( *error != CL_SUCCESS ) { + long long unsigned imageSize = get_image_size_mb(imageInfo); switch (imageInfo->type) { case CL_MEM_OBJECT_IMAGE1D: - log_error( "ERROR: Unable to create 1D image of size %d (%s)", (int)imageInfo->width, IGetErrorString( *error ) ); + log_error("ERROR: Unable to create 1D image of size %d (%llu " + "MB):(%s)", + (int)imageInfo->width, imageSize, + IGetErrorString(*error)); break; case CL_MEM_OBJECT_IMAGE2D: - log_error( "ERROR: Unable to create 2D image of size %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, IGetErrorString( *error ) ); + log_error("ERROR: Unable to create 2D image of size %d x %d " + "(%llu MB):(%s)", + (int)imageInfo->width, (int)imageInfo->height, + imageSize, IGetErrorString(*error)); break; case CL_MEM_OBJECT_IMAGE3D: - log_error( "ERROR: Unable to create 3D image of size %d x %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, IGetErrorString( *error ) ); + log_error("ERROR: Unable to create 3D image of size %d x %d x " + "%d (%llu MB):(%s)", + (int)imageInfo->width, (int)imageInfo->height, + (int)imageInfo->depth, imageSize, + IGetErrorString(*error)); break; case CL_MEM_OBJECT_IMAGE1D_ARRAY: - log_error( "ERROR: Unable to create 1D image array of size %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->arraySize, IGetErrorString( *error ) ); + log_error("ERROR: Unable to create 1D image array of size %d x " + "%d (%llu MB):(%s)", + (int)imageInfo->width, (int)imageInfo->arraySize, + imageSize, IGetErrorString(*error)); break; break; case CL_MEM_OBJECT_IMAGE2D_ARRAY: - log_error( "ERROR: Unable to create 2D image array of size %d x %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, IGetErrorString( *error ) ); + log_error("ERROR: Unable to create 2D image array of size %d x " + "%d x %d (%llu MB):(%s)", + (int)imageInfo->width, (int)imageInfo->height, + (int)imageInfo->arraySize, imageSize, + IGetErrorString(*error)); break; } log_error("ERROR: and %llu mip levels\n", (unsigned long long) imageInfo->num_mip_levels); @@ -266,7 +284,6 @@ cl_mem create_image( cl_context context, cl_command_queue queue, BufferOwningPtr return img; } - // WARNING -- not thread safe BufferOwningPtr srcData; BufferOwningPtr dstData; @@ -291,24 +308,7 @@ int test_copy_image_generic( cl_context context, cl_command_queue queue, image_d } else { - switch (srcImageInfo->type) - { - case CL_MEM_OBJECT_IMAGE1D: - srcBytes = srcImageInfo->rowPitch; - break; - case CL_MEM_OBJECT_IMAGE2D: - srcBytes = srcImageInfo->height * srcImageInfo->rowPitch; - break; - case CL_MEM_OBJECT_IMAGE3D: - srcBytes = srcImageInfo->depth * srcImageInfo->slicePitch; - break; - case CL_MEM_OBJECT_IMAGE1D_ARRAY: - srcBytes = srcImageInfo->arraySize * srcImageInfo->slicePitch; - break; - case CL_MEM_OBJECT_IMAGE2D_ARRAY: - srcBytes = srcImageInfo->arraySize * srcImageInfo->slicePitch; - break; - } + srcBytes = get_image_size(srcImageInfo); } if (srcBytes > srcData.getSize()) @@ -344,24 +344,7 @@ int test_copy_image_generic( cl_context context, cl_command_queue queue, image_d } else { - switch (dstImageInfo->type) - { - case CL_MEM_OBJECT_IMAGE1D: - destImageSize = dstImageInfo->rowPitch; - break; - case CL_MEM_OBJECT_IMAGE2D: - destImageSize = dstImageInfo->height * dstImageInfo->rowPitch; - break; - case CL_MEM_OBJECT_IMAGE3D: - destImageSize = dstImageInfo->depth * dstImageInfo->slicePitch; - break; - case CL_MEM_OBJECT_IMAGE1D_ARRAY: - destImageSize = dstImageInfo->arraySize * dstImageInfo->slicePitch; - break; - case CL_MEM_OBJECT_IMAGE2D_ARRAY: - destImageSize = dstImageInfo->arraySize * dstImageInfo->slicePitch; - break; - } + destImageSize = get_image_size(dstImageInfo); } if (destImageSize > dstData.getSize()) @@ -373,7 +356,11 @@ int test_copy_image_generic( cl_context context, cl_command_queue queue, image_d log_error( "ERROR: Unable to malloc %lu bytes for dstData\n", destImageSize ); return -1; } + } + if (destImageSize > dstHost.getSize()) + { + dstHost.reset(NULL); dstHost.reset(malloc(destImageSize),NULL,0,destImageSize); if (dstHost == NULL) { dstData.reset(NULL); @@ -560,59 +547,19 @@ int test_copy_image_generic( cl_context context, cl_command_queue queue, image_d { if( memcmp( sourcePtr, destPtr, scanlineSize ) != 0 ) { - log_error( "ERROR: Scanline %d did not verify for image size %d,%d,%d pitch %d (extra %d bytes)\n", (int)y, (int)dstImageInfo->width, (int)dstImageInfo->height, (int)dstImageInfo->depth, (int)dstImageInfo->rowPitch, (int)dstImageInfo->rowPitch - (int)dstImageInfo->width * (int)get_pixel_size( dstImageInfo->format ) ); - - // Find the first missing pixel + // Find the first differing pixel size_t pixel_size = get_pixel_size( dstImageInfo->format ); - size_t where = 0; - for( where = 0; where < dstImageInfo->width; where++ ) - if( memcmp( sourcePtr + pixel_size * where, destPtr + pixel_size * where, pixel_size) ) - break; - log_error( "Failed at column: %ld ", where ); - switch( pixel_size ) + size_t where = + compare_scanlines(dstImageInfo, sourcePtr, destPtr); + + if (where < dstImageInfo->width) { - case 1: - log_error( "*0x%2.2x vs. 0x%2.2x\n", ((cl_uchar*)(sourcePtr + pixel_size * where))[0], ((cl_uchar*)(destPtr + pixel_size * where))[0] ); - break; - case 2: - log_error( "*0x%4.4x vs. 0x%4.4x\n", ((cl_ushort*)(sourcePtr + pixel_size * where))[0], ((cl_ushort*)(destPtr + pixel_size * where))[0] ); - break; - case 3: - log_error( "*{0x%2.2x, 0x%2.2x, 0x%2.2x} vs. {0x%2.2x, 0x%2.2x, 0x%2.2x}\n", - ((cl_uchar*)(sourcePtr + pixel_size * where))[0], ((cl_uchar*)(sourcePtr + pixel_size * where))[1], ((cl_uchar*)(sourcePtr + pixel_size * where))[2], - ((cl_uchar*)(destPtr + pixel_size * where))[0], ((cl_uchar*)(destPtr + pixel_size * where))[1], ((cl_uchar*)(destPtr + pixel_size * where))[2] - ); - break; - case 4: - log_error( "*0x%8.8x vs. 0x%8.8x\n", ((cl_uint*)(sourcePtr + pixel_size * where))[0], ((cl_uint*)(destPtr + pixel_size * where))[0] ); - break; - case 6: - log_error( "*{0x%4.4x, 0x%4.4x, 0x%4.4x} vs. {0x%4.4x, 0x%4.4x, 0x%4.4x}\n", - ((cl_ushort*)(sourcePtr + pixel_size * where))[0], ((cl_ushort*)(sourcePtr + pixel_size * where))[1], ((cl_ushort*)(sourcePtr + pixel_size * where))[2], - ((cl_ushort*)(destPtr + pixel_size * where))[0], ((cl_ushort*)(destPtr + pixel_size * where))[1], ((cl_ushort*)(destPtr + pixel_size * where))[2] - ); - break; - case 8: - log_error( "*0x%16.16llx vs. 0x%16.16llx\n", ((cl_ulong*)(sourcePtr + pixel_size * where))[0], ((cl_ulong*)(destPtr + pixel_size * where))[0] ); - break; - case 12: - log_error( "*{0x%8.8x, 0x%8.8x, 0x%8.8x} vs. {0x%8.8x, 0x%8.8x, 0x%8.8x}\n", - ((cl_uint*)(sourcePtr + pixel_size * where))[0], ((cl_uint*)(sourcePtr + pixel_size * where))[1], ((cl_uint*)(sourcePtr + pixel_size * where))[2], - ((cl_uint*)(destPtr + pixel_size * where))[0], ((cl_uint*)(destPtr + pixel_size * where))[1], ((cl_uint*)(destPtr + pixel_size * where))[2] - ); - break; - case 16: - log_error( "*{0x%8.8x, 0x%8.8x, 0x%8.8x, 0x%8.8x} vs. {0x%8.8x, 0x%8.8x, 0x%8.8x, 0x%8.8x}\n", - ((cl_uint*)(sourcePtr + pixel_size * where))[0], ((cl_uint*)(sourcePtr + pixel_size * where))[1], ((cl_uint*)(sourcePtr + pixel_size * where))[2], ((cl_uint*)(sourcePtr + pixel_size * where))[3], - ((cl_uint*)(destPtr + pixel_size * where))[0], ((cl_uint*)(destPtr + pixel_size * where))[1], ((cl_uint*)(destPtr + pixel_size * where))[2], ((cl_uint*)(destPtr + pixel_size * where))[3] - ); - break; - default: - log_error( "Don't know how to print pixel size of %ld\n", pixel_size ); - break; + print_first_pixel_difference_error( + where, sourcePtr + pixel_size * where, + destPtr + pixel_size * where, dstImageInfo, y, + dstImageInfo->depth); + return -1; } - - return -1; } sourcePtr += rowPitch; if((dstImageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY || dstImageInfo->type == CL_MEM_OBJECT_IMAGE1D)) @@ -632,5 +579,14 @@ int test_copy_image_generic( cl_context context, cl_command_queue queue, image_d return error; } + // Ensure the unmap call completes. + error = clFinish(queue); + if (error != CL_SUCCESS) + { + log_error("ERROR: clFinish() failed to return CL_SUCCESS: %s\n", + IGetErrorString(error)); + return error; + } + return 0; } diff --git a/test_conformance/images/clCopyImage/test_loops.cpp b/test_conformance/images/clCopyImage/test_loops.cpp index 03f34be7bb..6ee1e536ad 100644 --- a/test_conformance/images/clCopyImage/test_loops.cpp +++ b/test_conformance/images/clCopyImage/test_loops.cpp @@ -105,25 +105,14 @@ int test_image_type( cl_device_id device, cl_context context, cl_command_queue q int ret = 0; // Grab the list of supported image formats for integer reads - cl_image_format *formatList; - bool *filterFlags; - unsigned int numFormats; + std::vector formatList; + if (get_format_list(context, imageType, formatList, flags)) return -1; - if( get_format_list( context, imageType, formatList, numFormats, flags ) ) - return -1; - - filterFlags = new bool[ numFormats ]; - if( filterFlags == NULL ) - { - log_error( "ERROR: Out of memory allocating filter flags list!\n" ); - return -1; - } - memset( filterFlags, 0, sizeof( bool ) * numFormats ); - - filter_formats(formatList, filterFlags, numFormats, NULL); + std::vector filterFlags(formatList.size(), false); + filter_formats(formatList, filterFlags, nullptr); // Run the format list - for( unsigned int i = 0; i < numFormats; i++ ) + for (unsigned int i = 0; i < formatList.size(); i++) { int test_return = 0; if( filterFlags[i] ) @@ -168,9 +157,6 @@ int test_image_type( cl_device_id device, cl_context context, cl_command_queue q ret += test_return; } - delete filterFlags; - delete formatList; - return ret; } diff --git a/test_conformance/images/clFillImage/test_fill_generic.cpp b/test_conformance/images/clFillImage/test_fill_generic.cpp index c598939251..6cd6beb0ea 100644 --- a/test_conformance/images/clFillImage/test_fill_generic.cpp +++ b/test_conformance/images/clFillImage/test_fill_generic.cpp @@ -468,69 +468,19 @@ int test_fill_image_generic( cl_context context, cl_command_queue queue, image_d { for ( size_t y = 0; y < secondDim; y++ ) { - // If the data type is 101010 ignore bits 31 and 32 when comparing the row - if (imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010) { - for (size_t w=0;w!=scanlineSize/4;++w) { - ((cl_uint*)sourcePtr)[w] &= 0x3FFFFFFF; - ((cl_uint*)destPtr)[w] &= 0x3FFFFFFF; - } - } - if (memcmp( sourcePtr, destPtr, scanlineSize ) != 0) { - log_error( "ERROR: Scanline %d did not verify for image size %d,%d,%d pitch %d (extra %d bytes)\n", (int)y, (int)imageInfo->width, (int)imageInfo->height, (int)thirdDim, (int)imageInfo->rowPitch, (int)imageInfo->rowPitch - (int)imageInfo->width * (int)get_pixel_size( imageInfo->format ) ); - - // Find the first missing pixel + // Find the first differing pixel size_t pixel_size = get_pixel_size( imageInfo->format ); - size_t where = 0; - for ( where = 0; where < imageInfo->width; where++ ) - if ( memcmp( sourcePtr + pixel_size * where, destPtr + pixel_size * where, pixel_size) ) - break; - log_error( "Failed at column: %ld ", where ); - switch ( pixel_size ) + size_t where = compare_scanlines(imageInfo, sourcePtr, destPtr); + + if (where < imageInfo->width) { - case 1: - log_error( "*0x%2.2x vs. 0x%2.2x\n", ((cl_uchar*)(sourcePtr + pixel_size * where))[0], ((cl_uchar*)(destPtr + pixel_size * where))[0] ); - break; - case 2: - log_error( "*0x%4.4x vs. 0x%4.4x\n", ((cl_ushort*)(sourcePtr + pixel_size * where))[0], ((cl_ushort*)(destPtr + pixel_size * where))[0] ); - break; - case 3: - log_error( "*{0x%2.2x, 0x%2.2x, 0x%2.2x} vs. {0x%2.2x, 0x%2.2x, 0x%2.2x}\n", - ((cl_uchar*)(sourcePtr + pixel_size * where))[0], ((cl_uchar*)(sourcePtr + pixel_size * where))[1], ((cl_uchar*)(sourcePtr + pixel_size * where))[2], - ((cl_uchar*)(destPtr + pixel_size * where))[0], ((cl_uchar*)(destPtr + pixel_size * where))[1], ((cl_uchar*)(destPtr + pixel_size * where))[2] - ); - break; - case 4: - log_error( "*0x%8.8x vs. 0x%8.8x\n", ((cl_uint*)(sourcePtr + pixel_size * where))[0], ((cl_uint*)(destPtr + pixel_size * where))[0] ); - break; - case 6: - log_error( "*{0x%4.4x, 0x%4.4x, 0x%4.4x} vs. {0x%4.4x, 0x%4.4x, 0x%4.4x}\n", - ((cl_ushort*)(sourcePtr + pixel_size * where))[0], ((cl_ushort*)(sourcePtr + pixel_size * where))[1], ((cl_ushort*)(sourcePtr + pixel_size * where))[2], - ((cl_ushort*)(destPtr + pixel_size * where))[0], ((cl_ushort*)(destPtr + pixel_size * where))[1], ((cl_ushort*)(destPtr + pixel_size * where))[2] - ); - break; - case 8: - log_error( "*0x%16.16llx vs. 0x%16.16llx\n", ((cl_ulong*)(sourcePtr + pixel_size * where))[0], ((cl_ulong*)(destPtr + pixel_size * where))[0] ); - break; - case 12: - log_error( "*{0x%8.8x, 0x%8.8x, 0x%8.8x} vs. {0x%8.8x, 0x%8.8x, 0x%8.8x}\n", - ((cl_uint*)(sourcePtr + pixel_size * where))[0], ((cl_uint*)(sourcePtr + pixel_size * where))[1], ((cl_uint*)(sourcePtr + pixel_size * where))[2], - ((cl_uint*)(destPtr + pixel_size * where))[0], ((cl_uint*)(destPtr + pixel_size * where))[1], ((cl_uint*)(destPtr + pixel_size * where))[2] - ); - break; - case 16: - log_error( "*{0x%8.8x, 0x%8.8x, 0x%8.8x, 0x%8.8x} vs. {0x%8.8x, 0x%8.8x, 0x%8.8x, 0x%8.8x}\n", - ((cl_uint*)(sourcePtr + pixel_size * where))[0], ((cl_uint*)(sourcePtr + pixel_size * where))[1], ((cl_uint*)(sourcePtr + pixel_size * where))[2], ((cl_uint*)(sourcePtr + pixel_size * where))[3], - ((cl_uint*)(destPtr + pixel_size * where))[0], ((cl_uint*)(destPtr + pixel_size * where))[1], ((cl_uint*)(destPtr + pixel_size * where))[2], ((cl_uint*)(destPtr + pixel_size * where))[3] - ); - break; - default: - log_error( "Don't know how to print pixel size of %ld\n", pixel_size ); - break; + print_first_pixel_difference_error( + where, sourcePtr + pixel_size * where, + destPtr + pixel_size * where, imageInfo, y, thirdDim); + return -1; } - - return -1; } total_matched += scanlineSize; diff --git a/test_conformance/images/clFillImage/test_loops.cpp b/test_conformance/images/clFillImage/test_loops.cpp index 3ab696ef12..759f48d2f3 100644 --- a/test_conformance/images/clFillImage/test_loops.cpp +++ b/test_conformance/images/clFillImage/test_loops.cpp @@ -69,35 +69,22 @@ int test_image_type( cl_device_id device, cl_context context, cl_command_queue q int ret = 0; // Grab the list of supported image formats - cl_image_format *formatList; - bool *filterFlags; - unsigned int numFormats; - - if ( get_format_list( context, imageType, formatList, numFormats, flags ) ) - return -1; - - filterFlags = new bool[ numFormats ]; - if ( filterFlags == NULL ) - { - log_error( "ERROR: Out of memory allocating filter flags list!\n" ); - return -1; - } - memset( filterFlags, 0, sizeof( bool ) * numFormats ); + std::vector formatList; + if (get_format_list(context, imageType, formatList, flags)) return -1; for (auto test : imageTestTypes) { if (gTypesToTest & test.type) { - if (filter_formats(formatList, filterFlags, numFormats, - test.channelTypes) - == 0) + std::vector filterFlags(formatList.size(), false); + if (filter_formats(formatList, filterFlags, test.channelTypes) == 0) { log_info("No formats supported for %s type\n", test.name); } else { // Run the format list - for (unsigned int i = 0; i < numFormats; i++) + for (unsigned int i = 0; i < formatList.size(); i++) { if (filterFlags[i]) { @@ -125,9 +112,6 @@ int test_image_type( cl_device_id device, cl_context context, cl_command_queue q } } - delete[] filterFlags; - delete[] formatList; - return ret; } diff --git a/test_conformance/images/clGetInfo/test_loops.cpp b/test_conformance/images/clGetInfo/test_loops.cpp index 0abb14bf99..17f02d8b46 100644 --- a/test_conformance/images/clGetInfo/test_loops.cpp +++ b/test_conformance/images/clGetInfo/test_loops.cpp @@ -29,28 +29,14 @@ int test_image_type( cl_device_id device, cl_context context, cl_mem_object_type int ret = 0; // Grab the list of supported image formats for integer reads - cl_image_format *formatList; - bool *filterFlags; - unsigned int numFormats; + std::vector formatList; + if (get_format_list(context, image_type, formatList, flags)) return -1; - if ( get_format_list( context, image_type, formatList, numFormats, flags ) ) - return -1; - - BufferOwningPtr formatListBuf(formatList); - - filterFlags = new bool[ numFormats ]; - BufferOwningPtr filterFlagsBuf(filterFlags); - - if( filterFlags == NULL ) - { - log_error( "ERROR: Out of memory allocating filter flags list!\n" ); - return -1; - } - memset( filterFlags, 0, sizeof( bool ) * numFormats ); - filter_formats( formatList, filterFlags, numFormats, 0 ); + std::vector filterFlags(formatList.size(), false); + filter_formats(formatList, filterFlags, nullptr); // Run the format list - for( unsigned int i = 0; i < numFormats; i++ ) + for (unsigned int i = 0; i < formatList.size(); i++) { int test_return = 0; if( filterFlags[i] ) diff --git a/test_conformance/images/clReadWriteImage/test_loops.cpp b/test_conformance/images/clReadWriteImage/test_loops.cpp index f0690e186c..782e4b37d5 100644 --- a/test_conformance/images/clReadWriteImage/test_loops.cpp +++ b/test_conformance/images/clReadWriteImage/test_loops.cpp @@ -16,11 +16,23 @@ #include "../testBase.h" #include "../common.h" -extern int test_read_image_set_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format ); -extern int test_read_image_set_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format ); -extern int test_read_image_set_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format ); -extern int test_read_image_set_1D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format ); -extern int test_read_image_set_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format ); +extern int test_read_image_set_1D(cl_device_id device, cl_context context, + cl_command_queue queue, + cl_image_format *format, cl_mem_flags flags); +extern int test_read_image_set_2D(cl_device_id device, cl_context context, + cl_command_queue queue, + cl_image_format *format, cl_mem_flags flags); +extern int test_read_image_set_3D(cl_device_id device, cl_context context, + cl_command_queue queue, + cl_image_format *format, cl_mem_flags flags); +extern int test_read_image_set_1D_array(cl_device_id device, cl_context context, + cl_command_queue queue, + cl_image_format *format, + cl_mem_flags flags); +extern int test_read_image_set_2D_array(cl_device_id device, cl_context context, + cl_command_queue queue, + cl_image_format *format, + cl_mem_flags flags); int test_image_type( cl_device_id device, cl_context context, cl_command_queue queue, cl_mem_object_type imageType, cl_mem_flags flags ) { @@ -28,80 +40,76 @@ int test_image_type( cl_device_id device, cl_context context, cl_command_queue q int ret = 0; - // Grab the list of supported image formats for integer reads - cl_image_format *formatList; - bool *filterFlags; - unsigned int numFormats; - - if ( gTestMipmaps ) - { - if ( 0 == is_extension_available( device, "cl_khr_mipmap_image" )) + if (gTestMipmaps) { - log_info( "-----------------------------------------------------\n" ); - log_info( "This device does not support cl_khr_mipmap_image.\nSkipping mipmapped image test. \n" ); - log_info( "-----------------------------------------------------\n\n" ); - return 0; + if (0 == is_extension_available(device, "cl_khr_mipmap_image")) + { + log_info("-----------------------------------------------------\n"); + log_info("This device does not support " + "cl_khr_mipmap_image.\nSkipping mipmapped image test. \n"); + log_info( + "-----------------------------------------------------\n\n"); + return 0; + } } - } - if( get_format_list( context, imageType, formatList, numFormats, flags ) ) - return -1; + // Grab the list of supported image formats for integer reads + std::vector formatList; + if (get_format_list(context, imageType, formatList, flags)) return -1; - filterFlags = new bool[ numFormats ]; - if( filterFlags == NULL ) - { - log_error( "ERROR: Out of memory allocating filter flags list!\n" ); - return -1; - } - memset( filterFlags, 0, sizeof( bool ) * numFormats ); - filter_formats( formatList, filterFlags, numFormats, 0 ); + std::vector filterFlags(formatList.size(), false); + filter_formats(formatList, filterFlags, nullptr); // Run the format list - for( unsigned int i = 0; i < numFormats; i++ ) + for (unsigned int i = 0; i < formatList.size(); i++) { int test_return = 0; - if( filterFlags[i] ) + if (filterFlags[i]) { - log_info( "NOT RUNNING: " ); - print_header( &formatList[ i ], false ); + log_info("NOT RUNNING: "); + print_header(&formatList[i], false); continue; } - print_header( &formatList[ i ], false ); + print_header(&formatList[i], false); gTestCount++; - switch (imageType) { + switch (imageType) + { case CL_MEM_OBJECT_IMAGE1D: - test_return = test_read_image_set_1D( device, context, queue, &formatList[ i ] ); + test_return = test_read_image_set_1D(device, context, queue, + &formatList[i], flags); break; case CL_MEM_OBJECT_IMAGE2D: - test_return = test_read_image_set_2D( device, context, queue, &formatList[ i ] ); + test_return = test_read_image_set_2D(device, context, queue, + &formatList[i], flags); break; case CL_MEM_OBJECT_IMAGE3D: - test_return = test_read_image_set_3D( device,context, queue, &formatList[ i ] ); + test_return = test_read_image_set_3D(device, context, queue, + &formatList[i], flags); break; case CL_MEM_OBJECT_IMAGE1D_ARRAY: - test_return = test_read_image_set_1D_array( device, context, queue, &formatList[ i ] ); + test_return = test_read_image_set_1D_array( + device, context, queue, &formatList[i], flags); break; case CL_MEM_OBJECT_IMAGE2D_ARRAY: - test_return = test_read_image_set_2D_array( device, context, queue, &formatList[ i ] ); + test_return = test_read_image_set_2D_array( + device, context, queue, &formatList[i], flags); break; } - if (test_return) { + if (test_return) + { gFailCount++; - log_error( "FAILED: " ); - print_header( &formatList[ i ], true ); - log_info( "\n" ); + log_error("FAILED: "); + print_header(&formatList[i], true); + log_info("\n"); } ret += test_return; } - delete[] filterFlags; - delete[] formatList; - return ret; } diff --git a/test_conformance/images/clReadWriteImage/test_read_1D.cpp b/test_conformance/images/clReadWriteImage/test_read_1D.cpp index 8f996e8ff5..eef5bf4e56 100644 --- a/test_conformance/images/clReadWriteImage/test_read_1D.cpp +++ b/test_conformance/images/clReadWriteImage/test_read_1D.cpp @@ -15,7 +15,9 @@ // #include "../testBase.h" -int test_read_image_1D( cl_context context, cl_command_queue queue, image_descriptor *imageInfo, MTdata d ) +int test_read_image_1D(cl_context context, cl_command_queue queue, + image_descriptor *imageInfo, MTdata d, + cl_mem_flags flags) { int error; @@ -34,12 +36,14 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, image_descri // Construct testing sources if(!gTestMipmaps) { - image = create_image_1d( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, 0, NULL, NULL, &error ); - if( image == NULL ) - { - log_error( "ERROR: Unable to create 1D image of size %d (%s)", (int)imageInfo->width, IGetErrorString( error ) ); - return -1; - } + image = create_image_1d(context, flags, imageInfo->format, + imageInfo->width, 0, NULL, NULL, &error); + if (image == NULL) + { + log_error("ERROR: Unable to create 1D image of size %d (%s)", + (int)imageInfo->width, IGetErrorString(error)); + return -1; + } } else { @@ -48,7 +52,8 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, image_descri image_desc.image_width = imageInfo->width; image_desc.num_mip_levels = imageInfo->num_mip_levels; - image = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format, &image_desc, NULL, &error); + image = clCreateImage(context, flags, imageInfo->format, &image_desc, NULL, + &error); if( error != CL_SUCCESS ) { log_error( "ERROR: Unable to create %d level mipmapped 1D image of size %d x %d (pitch %d ) (%s)",(int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) ); @@ -158,7 +163,9 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, image_descri return 0; } -int test_read_image_set_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format ) +int test_read_image_set_1D(cl_device_id device, cl_context context, + cl_command_queue queue, cl_image_format *format, + cl_mem_flags flags) { size_t maxWidth; cl_ulong maxAllocSize, memSize; @@ -192,7 +199,8 @@ int test_read_image_set_1D( cl_device_id device, cl_context context, cl_command_ if( gDebugTrace ) log_info( " at size %d\n", (int)imageInfo.width ); - int ret = test_read_image_1D( context, queue, &imageInfo, seed ); + int ret = + test_read_image_1D(context, queue, &imageInfo, seed, flags); if( ret ) return -1; } @@ -216,7 +224,7 @@ int test_read_image_set_1D( cl_device_id device, cl_context context, cl_command_ log_info("Testing %d\n", (int)imageInfo.width); if( gDebugTrace ) log_info( " at max size %d\n", (int)maxWidth ); - if( test_read_image_1D( context, queue, &imageInfo, seed ) ) + if (test_read_image_1D(context, queue, &imageInfo, seed, flags)) return -1; } } @@ -252,7 +260,8 @@ int test_read_image_set_1D( cl_device_id device, cl_context context, cl_command_ if( gDebugTrace ) log_info( " at size %d (row pitch %d) out of %d\n", (int)imageInfo.width, (int)imageInfo.rowPitch, (int)maxWidth ); - int ret = test_read_image_1D( context, queue, &imageInfo, seed ); + int ret = + test_read_image_1D(context, queue, &imageInfo, seed, flags); if( ret ) return -1; } diff --git a/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp b/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp index ad0444d7ad..5d5c288306 100644 --- a/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp +++ b/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp @@ -15,7 +15,9 @@ // #include "../testBase.h" -int test_read_image_1D_array( cl_context context, cl_command_queue queue, image_descriptor *imageInfo, MTdata d ) +int test_read_image_1D_array(cl_context context, cl_command_queue queue, + image_descriptor *imageInfo, MTdata d, + cl_mem_flags flags) { int error; @@ -35,7 +37,9 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, image_ // Construct testing sources if(!gTestMipmaps) { - image = create_image_1d_array( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->arraySize, 0, 0, NULL, &error ); + image = create_image_1d_array(context, flags, imageInfo->format, + imageInfo->width, imageInfo->arraySize, 0, + 0, NULL, &error); if( image == NULL ) { log_error( "ERROR: Unable to create 1D image array of size %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->arraySize, IGetErrorString( error ) ); @@ -50,7 +54,8 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, image_ image_desc.image_array_size = imageInfo->arraySize; image_desc.num_mip_levels = imageInfo->num_mip_levels; - image = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format, &image_desc, NULL, &error); + image = clCreateImage(context, flags, imageInfo->format, &image_desc, + NULL, &error); if( error != CL_SUCCESS ) { log_error( "ERROR: Unable to create %d level mipmapped 1D image of width %d and array size %d (pitch %d ) (%s)",(int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->arraySize, (int)imageInfo->rowPitch, IGetErrorString( error ) ); @@ -164,7 +169,9 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, image_ return 0; } -int test_read_image_set_1D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format ) +int test_read_image_set_1D_array(cl_device_id device, cl_context context, + cl_command_queue queue, + cl_image_format *format, cl_mem_flags flags) { size_t maxWidth, maxArraySize; cl_ulong maxAllocSize, memSize; @@ -201,7 +208,8 @@ int test_read_image_set_1D_array( cl_device_id device, cl_context context, cl_co if( gDebugTrace ) log_info( " at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize ); - int ret = test_read_image_1D_array( context, queue, &imageInfo, seed ); + int ret = test_read_image_1D_array(context, queue, &imageInfo, + seed, flags); if( ret ) return -1; } @@ -228,7 +236,8 @@ int test_read_image_set_1D_array( cl_device_id device, cl_context context, cl_co log_info("Testing %d x %d\n", (int)imageInfo.width, (int)imageInfo.arraySize); if( gDebugTrace ) log_info( " at max size %d,%d\n", (int)maxWidth, (int)maxArraySize ); - if( test_read_image_1D_array( context, queue, &imageInfo, seed ) ) + if (test_read_image_1D_array(context, queue, &imageInfo, seed, + flags)) return -1; } } @@ -266,7 +275,8 @@ int test_read_image_set_1D_array( cl_device_id device, cl_context context, cl_co if( gDebugTrace ) log_info( " at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxArraySize ); - int ret = test_read_image_1D_array( context, queue, &imageInfo, seed ); + int ret = test_read_image_1D_array(context, queue, &imageInfo, seed, + flags); if( ret ) return -1; } diff --git a/test_conformance/images/clReadWriteImage/test_read_2D.cpp b/test_conformance/images/clReadWriteImage/test_read_2D.cpp index 7c2050350e..fb2e794853 100644 --- a/test_conformance/images/clReadWriteImage/test_read_2D.cpp +++ b/test_conformance/images/clReadWriteImage/test_read_2D.cpp @@ -15,7 +15,9 @@ // #include "../testBase.h" -int test_read_image_2D( cl_context context, cl_command_queue queue, image_descriptor *imageInfo, MTdata d ) +int test_read_image_2D(cl_context context, cl_command_queue queue, + image_descriptor *imageInfo, MTdata d, + cl_mem_flags flags) { int error; @@ -35,7 +37,9 @@ int test_read_image_2D( cl_context context, cl_command_queue queue, image_descri // Construct testing sources if(!gTestMipmaps) { - image = create_image_2d( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->height, 0, NULL, &error ); + image = + create_image_2d(context, flags, imageInfo->format, imageInfo->width, + imageInfo->height, 0, NULL, &error); if( image == NULL ) { log_error( "ERROR: Unable to create 2D image of size %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, IGetErrorString( error ) ); @@ -50,7 +54,8 @@ int test_read_image_2D( cl_context context, cl_command_queue queue, image_descri image_desc.image_height = imageInfo->height; image_desc.num_mip_levels = imageInfo->num_mip_levels; - image = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format, &image_desc, NULL, &error); + image = clCreateImage(context, flags, imageInfo->format, &image_desc, + NULL, &error); if( error != CL_SUCCESS ) { log_error( "ERROR: Unable to create %d level mipmapped 2D image of size %d x %d (pitch %d ) (%s)",(int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) ); @@ -167,7 +172,9 @@ int test_read_image_2D( cl_context context, cl_command_queue queue, image_descri return 0; } -int test_read_image_set_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format ) +int test_read_image_set_2D(cl_device_id device, cl_context context, + cl_command_queue queue, cl_image_format *format, + cl_mem_flags flags) { size_t maxWidth, maxHeight; cl_ulong maxAllocSize, memSize; @@ -203,7 +210,8 @@ int test_read_image_set_2D( cl_device_id device, cl_context context, cl_command_ if( gDebugTrace ) log_info( " at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.height ); - int ret = test_read_image_2D( context, queue, &imageInfo, seed ); + int ret = + test_read_image_2D(context, queue, &imageInfo, seed, flags); if( ret ) return -1; } @@ -229,7 +237,7 @@ int test_read_image_set_2D( cl_device_id device, cl_context context, cl_command_ log_info("Testing %d x %d\n", (int)imageInfo.width, (int)imageInfo.height); if( gDebugTrace ) log_info( " at max size %d,%d\n", (int)maxWidth, (int)maxHeight ); - if( test_read_image_2D( context, queue, &imageInfo, seed ) ) + if (test_read_image_2D(context, queue, &imageInfo, seed, flags)) return -1; } } @@ -265,7 +273,8 @@ int test_read_image_set_2D( cl_device_id device, cl_context context, cl_command_ if( gDebugTrace ) log_info( " at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxHeight ); - int ret = test_read_image_2D( context, queue, &imageInfo, seed ); + int ret = + test_read_image_2D(context, queue, &imageInfo, seed, flags); if( ret ) return -1; } diff --git a/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp b/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp index 6118e69764..d0113bb749 100644 --- a/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp +++ b/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp @@ -15,7 +15,9 @@ // #include "../testBase.h" -int test_read_image_2D_array( cl_context context, cl_command_queue queue, image_descriptor *imageInfo, MTdata d ) +int test_read_image_2D_array(cl_context context, cl_command_queue queue, + image_descriptor *imageInfo, MTdata d, + cl_mem_flags flags) { int error; @@ -35,7 +37,9 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, image_ // Construct testing sources if(!gTestMipmaps) { - image = create_image_2d_array( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->height, imageInfo->arraySize, 0, 0, NULL, &error ); + image = create_image_2d_array(context, flags, imageInfo->format, + imageInfo->width, imageInfo->height, + imageInfo->arraySize, 0, 0, NULL, &error); if( image == NULL ) { log_error( "ERROR: Unable to create 2D image array of size %d x %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, IGetErrorString( error ) ); @@ -51,7 +55,8 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, image_ image_desc.image_array_size = imageInfo->arraySize; image_desc.num_mip_levels = imageInfo->num_mip_levels; - image = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format, &image_desc, NULL, &error); + image = clCreateImage(context, flags, imageInfo->format, &image_desc, + NULL, &error); if( error != CL_SUCCESS ) { log_error( "ERROR: Unable to create %d level mipmapped 3D image of size %d x %d x %d (pitch %d, %d ) (%s)",(int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) ); @@ -142,7 +147,9 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, image_ return 0; } -int test_read_image_set_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format ) +int test_read_image_set_2D_array(cl_device_id device, cl_context context, + cl_command_queue queue, + cl_image_format *format, cl_mem_flags flags) { size_t maxWidth, maxHeight, maxArraySize; cl_ulong maxAllocSize, memSize; @@ -181,7 +188,8 @@ int test_read_image_set_2D_array( cl_device_id device, cl_context context, cl_co if( gDebugTrace ) log_info( " at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize ); - int ret = test_read_image_2D_array( context, queue, &imageInfo, seed ); + int ret = test_read_image_2D_array(context, queue, + &imageInfo, seed, flags); if( ret ) return -1; } @@ -209,7 +217,8 @@ int test_read_image_set_2D_array( cl_device_id device, cl_context context, cl_co imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, imageInfo.height, 0), seed); log_info("Testing %d x %d x %d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize); - if( test_read_image_2D_array( context, queue, &imageInfo, seed ) ) + if (test_read_image_2D_array(context, queue, &imageInfo, seed, + flags)) return -1; } } @@ -253,7 +262,8 @@ int test_read_image_set_2D_array( cl_device_id device, cl_context context, cl_co if( gDebugTrace ) log_info( " at size %d,%d,%d (pitch %d,%d) out of %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)imageInfo.slicePitch, (int)maxWidth, (int)maxHeight, (int)maxArraySize ); - int ret = test_read_image_2D_array( context, queue, &imageInfo, seed ); + int ret = test_read_image_2D_array(context, queue, &imageInfo, seed, + flags); if( ret ) return -1; } diff --git a/test_conformance/images/clReadWriteImage/test_read_3D.cpp b/test_conformance/images/clReadWriteImage/test_read_3D.cpp index 8f21ae94e1..2dcd2433d6 100644 --- a/test_conformance/images/clReadWriteImage/test_read_3D.cpp +++ b/test_conformance/images/clReadWriteImage/test_read_3D.cpp @@ -15,7 +15,9 @@ // #include "../testBase.h" -int test_read_image_3D( cl_context context, cl_command_queue queue, image_descriptor *imageInfo, MTdata d ) +int test_read_image_3D(cl_context context, cl_command_queue queue, + image_descriptor *imageInfo, MTdata d, + cl_mem_flags flags) { int error; @@ -34,7 +36,9 @@ int test_read_image_3D( cl_context context, cl_command_queue queue, image_descri // Construct testing sources if(!gTestMipmaps) { - image = create_image_3d( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->height, imageInfo->depth, 0, 0, NULL, &error ); + image = create_image_3d(context, flags, imageInfo->format, + imageInfo->width, imageInfo->height, + imageInfo->depth, 0, 0, NULL, &error); if( image == NULL ) { log_error( "ERROR: Unable to create 2D image of size %d x %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, IGetErrorString( error ) ); @@ -50,7 +54,8 @@ int test_read_image_3D( cl_context context, cl_command_queue queue, image_descri image_desc.image_depth = imageInfo->depth; image_desc.num_mip_levels = imageInfo->num_mip_levels; - image = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format, &image_desc, NULL, &error); + image = clCreateImage(context, flags, imageInfo->format, &image_desc, + NULL, &error); if( error != CL_SUCCESS ) { log_error( "ERROR: Unable to create %d level mipmapped 3D image of size %d x %d x %d (pitch %d, %d ) (%s)",(int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) ); @@ -147,7 +152,9 @@ int test_read_image_3D( cl_context context, cl_command_queue queue, image_descri return 0; } -int test_read_image_set_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format ) +int test_read_image_set_3D(cl_device_id device, cl_context context, + cl_command_queue queue, cl_image_format *format, + cl_mem_flags flags) { size_t maxWidth, maxHeight, maxDepth; cl_ulong maxAllocSize, memSize; @@ -186,7 +193,8 @@ int test_read_image_set_3D( cl_device_id device, cl_context context, cl_command_ if( gDebugTrace ) log_info( " at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth ); - int ret = test_read_image_3D( context, queue, &imageInfo, seed ); + int ret = test_read_image_3D(context, queue, &imageInfo, + seed, flags); if( ret ) return -1; } @@ -214,8 +222,8 @@ int test_read_image_set_3D( cl_device_id device, cl_context context, cl_command_ imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, imageInfo.height, imageInfo.depth), seed); log_info("Testing %d x %d x %d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth); - if( test_read_image_3D( context, queue, &imageInfo, seed ) ) - return -1; + if (test_read_image_3D(context, queue, &imageInfo, seed, flags)) + return -1; } } else @@ -257,7 +265,8 @@ int test_read_image_set_3D( cl_device_id device, cl_context context, cl_command_ if( gDebugTrace ) log_info( " at size %d,%d,%d (pitch %d,%d) out of %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth, (int)imageInfo.rowPitch, (int)imageInfo.slicePitch, (int)maxWidth, (int)maxHeight, (int)maxDepth ); - int ret = test_read_image_3D( context, queue, &imageInfo, seed ); + int ret = + test_read_image_3D(context, queue, &imageInfo, seed, flags); if( ret ) return -1; } diff --git a/test_conformance/images/common.cpp b/test_conformance/images/common.cpp index a14242efe3..7323f11c1c 100644 --- a/test_conformance/images/common.cpp +++ b/test_conformance/images/common.cpp @@ -58,13 +58,13 @@ std::array imageTestTypes = { { { kTestFloat, kFloat, floatFormats, "float" }, } }; -int filter_formats(cl_image_format *formatList, bool *filterFlags, - unsigned int formatCount, +int filter_formats(const std::vector &formatList, + std::vector &filterFlags, cl_channel_type *channelDataTypesToFilter, bool testMipmaps /*=false*/) { int numSupported = 0; - for (unsigned int j = 0; j < formatCount; j++) + for (unsigned int j = 0; j < formatList.size(); j++) { // If this format has been previously filtered, remove the filter if (filterFlags[j]) filterFlags[j] = false; @@ -129,18 +129,18 @@ int filter_formats(cl_image_format *formatList, bool *filterFlags, } int get_format_list(cl_context context, cl_mem_object_type imageType, - cl_image_format *&outFormatList, - unsigned int &outFormatCount, cl_mem_flags flags) + std::vector &outFormatList, + cl_mem_flags flags) { + cl_uint formatCount; int error = clGetSupportedImageFormats(context, flags, imageType, 0, NULL, - &outFormatCount); + &formatCount); test_error(error, "Unable to get count of supported image formats"); - outFormatList = - (outFormatCount > 0) ? new cl_image_format[outFormatCount] : NULL; + outFormatList.resize(formatCount); - error = clGetSupportedImageFormats(context, flags, imageType, - outFormatCount, outFormatList, NULL); + error = clGetSupportedImageFormats(context, flags, imageType, formatCount, + outFormatList.data(), NULL); test_error(error, "Unable to get list of supported image formats"); return 0; } diff --git a/test_conformance/images/common.h b/test_conformance/images/common.h index 7ae2f4fa52..27e8679be4 100644 --- a/test_conformance/images/common.h +++ b/test_conformance/images/common.h @@ -22,6 +22,7 @@ #include "harness/conversions.h" #include +#include extern cl_channel_type gChannelTypeToUse; extern cl_channel_order gChannelOrderToUse; @@ -40,13 +41,13 @@ struct ImageTestTypes extern std::array imageTestTypes; -int filter_formats(cl_image_format *formatList, bool *filterFlags, - unsigned int formatCount, +int filter_formats(const std::vector &formatList, + std::vector &filterFlags, cl_channel_type *channelDataTypesToFilter, bool testMipmaps = false); int get_format_list(cl_context context, cl_mem_object_type imageType, - cl_image_format *&outFormatList, - unsigned int &outFormatCount, cl_mem_flags flags); + std::vector &outFormatList, + cl_mem_flags flags); size_t random_in_ranges(size_t minimum, size_t rangeA, size_t rangeB, MTdata d); #endif // IMAGES_COMMON_H diff --git a/test_conformance/images/kernel_image_methods/main.cpp b/test_conformance/images/kernel_image_methods/main.cpp index e1320ce398..50653ef55f 100644 --- a/test_conformance/images/kernel_image_methods/main.cpp +++ b/test_conformance/images/kernel_image_methods/main.cpp @@ -23,7 +23,6 @@ bool gDebugTrace; bool gTestSmallImages; bool gTestMaxImages; -bool gDeviceLt20 = false; cl_channel_type gChannelTypeToUse = (cl_channel_type)-1; cl_channel_order gChannelOrderToUse = (cl_channel_order)-1; diff --git a/test_conformance/images/kernel_image_methods/test_1D.cpp b/test_conformance/images/kernel_image_methods/test_1D.cpp index 1ea8eb8895..0059d4c203 100644 --- a/test_conformance/images/kernel_image_methods/test_1D.cpp +++ b/test_conformance/images/kernel_image_methods/test_1D.cpp @@ -15,7 +15,6 @@ // #include "../testBase.h" -extern bool gDeviceLt20; struct image_kernel_data { @@ -98,7 +97,8 @@ static int test_get_1Dimage_info_single(cl_context context, if (error) print_error(error, "clFinish failed.\n"); const char *ptr = programSrc; - error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0" ); + error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "sample_kernel"); test_error( error, "Unable to create kernel to test against" ); // Create an output buffer diff --git a/test_conformance/images/kernel_image_methods/test_1D_array.cpp b/test_conformance/images/kernel_image_methods/test_1D_array.cpp index 18c190bb18..797161c427 100644 --- a/test_conformance/images/kernel_image_methods/test_1D_array.cpp +++ b/test_conformance/images/kernel_image_methods/test_1D_array.cpp @@ -15,7 +15,6 @@ // #include "../testBase.h" -extern bool gDeviceLt20; struct image_kernel_data { @@ -102,7 +101,8 @@ int test_get_1Dimage_array_info_single(cl_context context, if (error) print_error(error, "clFinish failed.\n"); const char *ptr = programSrc; - error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0"); + error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "sample_kernel"); test_error( error, "Unable to create kernel to test against" ); // Create an output buffer diff --git a/test_conformance/images/kernel_image_methods/test_2D.cpp b/test_conformance/images/kernel_image_methods/test_2D.cpp index 2ebc546041..b0d4a7086d 100644 --- a/test_conformance/images/kernel_image_methods/test_2D.cpp +++ b/test_conformance/images/kernel_image_methods/test_2D.cpp @@ -15,7 +15,6 @@ // #include "../testBase.h" -extern bool gDeviceLt20; struct image_kernel_data { @@ -133,7 +132,8 @@ int test_get_image_info_single(cl_context context, cl_command_queue queue, if (error) print_error(error, "clFinish failed.\n"); const char *ptr = programSrc; - error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0" ); + error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "sample_kernel"); test_error( error, "Unable to create kernel to test against" ); // Create an output buffer diff --git a/test_conformance/images/kernel_image_methods/test_2D_array.cpp b/test_conformance/images/kernel_image_methods/test_2D_array.cpp index 98c1106249..21a6b049d0 100644 --- a/test_conformance/images/kernel_image_methods/test_2D_array.cpp +++ b/test_conformance/images/kernel_image_methods/test_2D_array.cpp @@ -15,7 +15,6 @@ // #include "../testBase.h" -extern bool gDeviceLt20; struct image_kernel_data { @@ -108,7 +107,8 @@ int test_get_2Dimage_array_info_single(cl_context context, if (error) print_error(error, "clFinish failed.\n"); const char *ptr = programSrc; - error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0" ); + error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "sample_kernel"); test_error( error, "Unable to create kernel to test against" ); // Create an output buffer @@ -244,6 +244,9 @@ int test_get_image_info_2D_array(cl_device_id device, cl_context context, for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ ) { cl_ulong size; + cl_ulong slicePitch; + cl_ulong rowPitch; + // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that // image, the result array, plus offset arrays, will fit in the global ram space do @@ -252,23 +255,26 @@ int test_get_image_info_2D_array(cl_device_id device, cl_context context, imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed ); imageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32, seed ); - imageInfo.rowPitch = imageInfo.width * pixelSize; - imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height; + rowPitch = imageInfo.width * pixelSize; + slicePitch = rowPitch * imageInfo.height; size_t extraWidth = (int)random_log_in_range( 0, 64, seed ); - imageInfo.rowPitch += extraWidth; + rowPitch += extraWidth; do { extraWidth++; - imageInfo.rowPitch += extraWidth; - } while ((imageInfo.rowPitch % pixelSize) != 0); + rowPitch += extraWidth; + } while ((rowPitch % pixelSize) != 0); size_t extraHeight = (int)random_log_in_range( 0, 8, seed ); - imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + extraHeight); + slicePitch = rowPitch * (imageInfo.height + extraHeight); - size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4 * 4; + size = slicePitch * imageInfo.arraySize * 4 * 4; } while( size > maxAllocSize || ( size * 3 ) > memSize ); + imageInfo.slicePitch = slicePitch; + imageInfo.rowPitch = rowPitch; + if( gDebugTrace ) log_info( " at size %d,%d,%d (pitch %d,%d) out of %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)imageInfo.slicePitch, (int)maxWidth, (int)maxHeight, (int)maxArraySize ); int ret = test_get_2Dimage_array_info_single( diff --git a/test_conformance/images/kernel_image_methods/test_3D.cpp b/test_conformance/images/kernel_image_methods/test_3D.cpp index 287005a511..aae433bd76 100644 --- a/test_conformance/images/kernel_image_methods/test_3D.cpp +++ b/test_conformance/images/kernel_image_methods/test_3D.cpp @@ -105,6 +105,9 @@ int test_get_image_info_3D(cl_device_id device, cl_context context, for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ ) { cl_ulong size; + cl_ulong slicePitch; + cl_ulong rowPitch; + // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that // image, the result array, plus offset arrays, will fit in the global ram space do @@ -113,23 +116,26 @@ int test_get_image_info_3D(cl_device_id device, cl_context context, imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed ); imageInfo.depth = (size_t)random_log_in_range( 16, (int)maxDepth / 32, seed ); - imageInfo.rowPitch = imageInfo.width * pixelSize; - imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height; + rowPitch = imageInfo.width * pixelSize; + slicePitch = imageInfo.rowPitch * imageInfo.height; size_t extraWidth = (int)random_log_in_range( 0, 64, seed ); - imageInfo.rowPitch += extraWidth; + rowPitch += extraWidth; do { extraWidth++; - imageInfo.rowPitch += extraWidth; - } while ((imageInfo.rowPitch % pixelSize) != 0); + rowPitch += extraWidth; + } while ((rowPitch % pixelSize) != 0); size_t extraHeight = (int)random_log_in_range( 0, 8, seed ); - imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + extraHeight); + slicePitch = rowPitch * (imageInfo.height + extraHeight); - size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.depth * 4 * 4; + size = slicePitch * imageInfo.depth * 4 * 4; } while( size > maxAllocSize || ( size * 3 ) > memSize ); + imageInfo.slicePitch = slicePitch; + imageInfo.rowPitch = rowPitch; + if( gDebugTrace ) log_info( " at size %d,%d,%d (pitch %d,%d) out of %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth, (int)imageInfo.rowPitch, (int)imageInfo.slicePitch, (int)maxWidth, (int)maxHeight, (int)maxDepth ); int ret = test_get_image_info_single(context, queue, &imageInfo, diff --git a/test_conformance/images/kernel_image_methods/test_loops.cpp b/test_conformance/images/kernel_image_methods/test_loops.cpp index 8dfebd2fcb..1d892a9b0b 100644 --- a/test_conformance/images/kernel_image_methods/test_loops.cpp +++ b/test_conformance/images/kernel_image_methods/test_loops.cpp @@ -16,7 +16,6 @@ #include "../testBase.h" #include "../common.h" -extern bool gDeviceLt20; extern int test_get_image_info_1D(cl_device_id device, cl_context context, cl_command_queue queue, @@ -43,24 +42,14 @@ int test_image_type( cl_device_id device, cl_context context, cl_command_queue q int ret = 0; // Grab the list of supported image formats for integer reads - cl_image_format *formatList; - bool *filterFlags; - unsigned int numFormats; + std::vector formatList; + if (get_format_list(context, imageType, formatList, flags)) return -1; - if( get_format_list( context, imageType, formatList, numFormats, flags ) ) - return -1; - - filterFlags = new bool[ numFormats ]; - if( filterFlags == NULL ) - { - log_error( "ERROR: Out of memory allocating filter flags list!\n" ); - return -1; - } - memset( filterFlags, 0, sizeof( bool ) * numFormats ); - filter_formats( formatList, filterFlags, numFormats, 0 ); + std::vector filterFlags(formatList.size(), false); + filter_formats(formatList, filterFlags, nullptr); // Run the format list - for( unsigned int i = 0; i < numFormats; i++ ) + for (unsigned int i = 0; i < formatList.size(); i++) { int test_return = 0; if( filterFlags[i] ) @@ -107,9 +96,6 @@ int test_image_type( cl_device_id device, cl_context context, cl_command_queue q ret += test_return; } - delete filterFlags; - delete formatList; - return ret; } @@ -117,9 +103,6 @@ int test_image_set( cl_device_id device, cl_context context, cl_command_queue qu { int version_check; auto version = get_device_cl_version(device); - if (version < Version(2, 0)) { - gDeviceLt20 = true; - } if ((version_check = (version < Version(1, 2)))) { diff --git a/test_conformance/images/kernel_read_write/main.cpp b/test_conformance/images/kernel_read_write/main.cpp index f430c7f57c..31dceb33a7 100644 --- a/test_conformance/images/kernel_read_write/main.cpp +++ b/test_conformance/images/kernel_read_write/main.cpp @@ -35,7 +35,6 @@ bool gTestSmallImages; bool gTestMaxImages; bool gTestImage2DFromBuffer; bool gTestMipmaps; -bool gDeviceLt20 = false; cl_filter_mode gFilterModeToUse = (cl_filter_mode)-1; // Default is CL_MEM_USE_HOST_PTR for the test cl_mem_flags gMemFlagsToUse = CL_MEM_USE_HOST_PTR; @@ -107,10 +106,6 @@ static int doTest( cl_device_id device, cl_context context, cl_command_queue que bool tDisableOffsets = false; bool tNormalizedModeToUse = false; cl_filter_mode tFilterModeToUse = (cl_filter_mode)-1; - auto version = get_device_cl_version(device); - if (version < Version(2, 0)) { - gDeviceLt20 = true; - } if( testTypesToRun & kReadTests ) { diff --git a/test_conformance/images/kernel_read_write/test_common.cpp b/test_conformance/images/kernel_read_write/test_common.cpp index 5182601b86..6b3cf849da 100644 --- a/test_conformance/images/kernel_read_write/test_common.cpp +++ b/test_conformance/images/kernel_read_write/test_common.cpp @@ -1,6 +1,23 @@ +// +// Copyright (c) 2021 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// #include "test_common.h" +#include + cl_sampler create_sampler(cl_context context, image_sampler_data *sdata, bool test_mipmaps, cl_int *error) { cl_sampler sampler = nullptr; if (test_mipmaps) { @@ -17,3 +34,1549 @@ cl_sampler create_sampler(cl_context context, image_sampler_data *sdata, bool te return sampler; } +void InitFloatCoordsCommon(image_descriptor *imageInfo, + image_sampler_data *imageSampler, float *xOffsets, + float *yOffsets, float *zOffsets, float xfract, + float yfract, float zfract, int normalized_coords, + MTdata d, int lod) +{ + size_t i = 0; + if (gDisableOffsets) + { + for (size_t z = 0; z < imageInfo->depth; z++) + { + for (size_t y = 0; y < imageInfo->height; y++) + { + for (size_t x = 0; x < imageInfo->width; x++, i++) + { + xOffsets[i] = (float)(xfract + (double)x); + yOffsets[i] = (float)(yfract + (double)y); + zOffsets[i] = (float)(zfract + (double)z); + } + } + } + } + else + { + for (size_t z = 0; z < imageInfo->depth; z++) + { + for (size_t y = 0; y < imageInfo->height; y++) + { + for (size_t x = 0; x < imageInfo->width; x++, i++) + { + xOffsets[i] = + (float)(xfract + + (double)((int)x + + random_in_range(-10, 10, d))); + yOffsets[i] = + (float)(yfract + + (double)((int)y + + random_in_range(-10, 10, d))); + zOffsets[i] = + (float)(zfract + + (double)((int)z + + random_in_range(-10, 10, d))); + } + } + } + } + + if (imageSampler->addressing_mode == CL_ADDRESS_NONE) + { + i = 0; + for (size_t z = 0; z < imageInfo->depth; z++) + { + for (size_t y = 0; y < imageInfo->height; y++) + { + for (size_t x = 0; x < imageInfo->width; x++, i++) + { + xOffsets[i] = (float)CLAMP((double)xOffsets[i], 0.0, + (double)imageInfo->width - 1.0); + yOffsets[i] = (float)CLAMP((double)yOffsets[i], 0.0, + (double)imageInfo->height - 1.0); + zOffsets[i] = (float)CLAMP((double)zOffsets[i], 0.0, + (double)imageInfo->depth - 1.0); + } + } + } + } + + if (normalized_coords || gTestMipmaps) + { + i = 0; + if (lod == 0) + { + for (size_t z = 0; z < imageInfo->depth; z++) + { + for (size_t y = 0; y < imageInfo->height; y++) + { + for (size_t x = 0; x < imageInfo->width; x++, i++) + { + xOffsets[i] = (float)((double)xOffsets[i] + / (double)imageInfo->width); + yOffsets[i] = (float)((double)yOffsets[i] + / (double)imageInfo->height); + zOffsets[i] = (float)((double)zOffsets[i] + / (double)imageInfo->depth); + } + } + } + } + else if (gTestMipmaps) + { + size_t width_lod, height_lod, depth_lod; + + width_lod = + (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1; + height_lod = + (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1; + depth_lod = + (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1; + + for (size_t z = 0; z < depth_lod; z++) + { + for (size_t y = 0; y < height_lod; y++) + { + for (size_t x = 0; x < width_lod; x++, i++) + { + xOffsets[i] = + (float)((double)xOffsets[i] / (double)width_lod); + yOffsets[i] = + (float)((double)yOffsets[i] / (double)height_lod); + zOffsets[i] = + (float)((double)zOffsets[i] / (double)depth_lod); + } + } + } + } + } +} + +int test_read_image(cl_context context, cl_command_queue queue, + cl_kernel kernel, image_descriptor *imageInfo, + image_sampler_data *imageSampler, bool useFloatCoords, + ExplicitType outputType, MTdata d) +{ + int error; + size_t threads[3]; + static int initHalf = 0; + + cl_mem_flags image_read_write_flags = CL_MEM_READ_ONLY; + + clMemWrapper xOffsets, yOffsets, zOffsets, results; + clSamplerWrapper actualSampler; + BufferOwningPtr maxImageUseHostPtrBackingStore; + + // Create offset data + BufferOwningPtr xOffsetValues( + malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height + * imageInfo->depth)); + BufferOwningPtr yOffsetValues( + malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height + * imageInfo->depth)); + BufferOwningPtr zOffsetValues( + malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height + * imageInfo->depth)); + + if (imageInfo->format->image_channel_data_type == CL_HALF_FLOAT) + if (DetectFloatToHalfRoundingMode(queue)) return 1; + + BufferOwningPtr imageValues; + generate_random_image_data(imageInfo, imageValues, d); + + // Construct testing sources + clProtectedImage protImage; + clMemWrapper unprotImage; + cl_mem image; + + if (gtestTypesToRun & kReadTests) + { + image_read_write_flags = CL_MEM_READ_ONLY; + } + else + { + image_read_write_flags = CL_MEM_READ_WRITE; + } + + if (gMemFlagsToUse == CL_MEM_USE_HOST_PTR) + { + // clProtectedImage uses USE_HOST_PTR, so just rely on that for the + // testing (via Ian) Do not use protected images for max image size test + // since it rounds the row size to a page size + if (gTestMaxImages) + { + generate_random_image_data(imageInfo, + maxImageUseHostPtrBackingStore, d); + unprotImage = create_image_3d( + context, image_read_write_flags | CL_MEM_USE_HOST_PTR, + imageInfo->format, imageInfo->width, imageInfo->height, + imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0), + (gEnablePitch ? imageInfo->slicePitch : 0), + maxImageUseHostPtrBackingStore, &error); + } + else + { + error = protImage.Create(context, image_read_write_flags, + imageInfo->format, imageInfo->width, + imageInfo->height, imageInfo->depth); + } + if (error != CL_SUCCESS) + { + log_error("ERROR: Unable to create 3D image of size %d x %d x %d " + "(pitch %d, %d ) (%s)", + (int)imageInfo->width, (int)imageInfo->height, + (int)imageInfo->depth, (int)imageInfo->rowPitch, + (int)imageInfo->slicePitch, IGetErrorString(error)); + return error; + } + if (gTestMaxImages) + image = (cl_mem)unprotImage; + else + image = (cl_mem)protImage; + } + else if (gMemFlagsToUse == CL_MEM_COPY_HOST_PTR) + { + // Don't use clEnqueueWriteImage; just use copy host ptr to get the data + // in + unprotImage = create_image_3d( + context, image_read_write_flags | CL_MEM_COPY_HOST_PTR, + imageInfo->format, imageInfo->width, imageInfo->height, + imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0), + (gEnablePitch ? imageInfo->slicePitch : 0), imageValues, &error); + if (error != CL_SUCCESS) + { + log_error("ERROR: Unable to create 3D image of size %d x %d x %d " + "(pitch %d, %d ) (%s)", + (int)imageInfo->width, (int)imageInfo->height, + (int)imageInfo->depth, (int)imageInfo->rowPitch, + (int)imageInfo->slicePitch, IGetErrorString(error)); + return error; + } + image = unprotImage; + } + else // Either CL_MEM_ALLOC_HOST_PTR or none + { + // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can + // be accessed by the host, but otherwise it works just as if no flag is + // specified, so we just do the same thing either way + if (!gTestMipmaps) + { + unprotImage = create_image_3d( + context, image_read_write_flags | gMemFlagsToUse, + imageInfo->format, imageInfo->width, imageInfo->height, + imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0), + (gEnablePitch ? imageInfo->slicePitch : 0), imageValues, + &error); + if (error != CL_SUCCESS) + { + log_error("ERROR: Unable to create 3D image of size %d x %d x " + "%d (pitch %d, %d ) (%s)", + (int)imageInfo->width, (int)imageInfo->height, + (int)imageInfo->depth, (int)imageInfo->rowPitch, + (int)imageInfo->slicePitch, IGetErrorString(error)); + return error; + } + image = unprotImage; + } + else + { + cl_image_desc image_desc = { 0 }; + image_desc.image_type = CL_MEM_OBJECT_IMAGE3D; + image_desc.image_width = imageInfo->width; + image_desc.image_height = imageInfo->height; + image_desc.image_depth = imageInfo->depth; + image_desc.num_mip_levels = imageInfo->num_mip_levels; + + + unprotImage = + clCreateImage(context, image_read_write_flags, + imageInfo->format, &image_desc, NULL, &error); + if (error != CL_SUCCESS) + { + log_error("ERROR: Unable to create %d level mipmapped 3D image " + "of size %d x %d x %d (pitch %d, %d ) (%s)", + (int)imageInfo->num_mip_levels, (int)imageInfo->width, + (int)imageInfo->height, (int)imageInfo->depth, + (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, + IGetErrorString(error)); + return error; + } + image = unprotImage; + } + } + + if (gMemFlagsToUse != CL_MEM_COPY_HOST_PTR) + { + size_t origin[4] = { 0, 0, 0, 0 }; + size_t region[3] = { imageInfo->width, imageInfo->height, + imageInfo->depth }; + + if (gDebugTrace) log_info(" - Writing image...\n"); + + if (!gTestMipmaps) + { + + error = + clEnqueueWriteImage(queue, image, CL_TRUE, origin, region, + gEnablePitch ? imageInfo->rowPitch : 0, + gEnablePitch ? imageInfo->slicePitch : 0, + imageValues, 0, NULL, NULL); + + if (error != CL_SUCCESS) + { + log_error("ERROR: Unable to write to 3D image of size %d x %d " + "x %d \n", + (int)imageInfo->width, (int)imageInfo->height, + (int)imageInfo->depth); + return error; + } + } + else + { + int nextLevelOffset = 0; + + for (int i = 0; i < imageInfo->num_mip_levels; i++) + { + origin[3] = i; + error = clEnqueueWriteImage( + queue, image, CL_TRUE, origin, region, + /*gEnablePitch ? imageInfo->rowPitch :*/ 0, + /*gEnablePitch ? imageInfo->slicePitch :*/ 0, + ((char *)imageValues + nextLevelOffset), 0, NULL, NULL); + if (error != CL_SUCCESS) + { + log_error("ERROR: Unable to write to %d level mipmapped 3D " + "image of size %d x %d x %d\n", + (int)imageInfo->num_mip_levels, + (int)imageInfo->width, (int)imageInfo->height, + (int)imageInfo->depth); + return error; + } + nextLevelOffset += region[0] * region[1] * region[2] + * get_pixel_size(imageInfo->format); + // Subsequent mip level dimensions keep halving + region[0] = region[0] >> 1 ? region[0] >> 1 : 1; + region[1] = region[1] >> 1 ? region[1] >> 1 : 1; + region[2] = region[2] >> 1 ? region[2] >> 1 : 1; + } + } + } + + xOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + sizeof(cl_float) * imageInfo->width + * imageInfo->height * imageInfo->depth, + xOffsetValues, &error); + test_error(error, "Unable to create x offset buffer"); + yOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + sizeof(cl_float) * imageInfo->width + * imageInfo->height * imageInfo->depth, + yOffsetValues, &error); + test_error(error, "Unable to create y offset buffer"); + zOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + sizeof(cl_float) * imageInfo->width + * imageInfo->height * imageInfo->depth, + zOffsetValues, &error); + test_error(error, "Unable to create y offset buffer"); + results = + clCreateBuffer(context, CL_MEM_READ_WRITE, + get_explicit_type_size(outputType) * 4 * imageInfo->width + * imageInfo->height * imageInfo->depth, + NULL, &error); + test_error(error, "Unable to create result buffer"); + + // Create sampler to use + actualSampler = create_sampler(context, imageSampler, gTestMipmaps, &error); + test_error(error, "Unable to create image sampler"); + + // Set arguments + int idx = 0; + error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &image); + test_error(error, "Unable to set kernel arguments"); + if (!gUseKernelSamplers) + { + error = + clSetKernelArg(kernel, idx++, sizeof(cl_sampler), &actualSampler); + test_error(error, "Unable to set kernel arguments"); + } + error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &xOffsets); + test_error(error, "Unable to set kernel arguments"); + error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &yOffsets); + test_error(error, "Unable to set kernel arguments"); + error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &zOffsets); + test_error(error, "Unable to set kernel arguments"); + error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &results); + test_error(error, "Unable to set kernel arguments"); + + const float float_offsets[] = { 0.0f, + MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30), + 0.25f, + 0.3f, + 0.5f - FLT_EPSILON / 4.0f, + 0.5f, + 0.9f, + 1.0f - FLT_EPSILON / 2 }; + int float_offset_count = sizeof(float_offsets) / sizeof(float_offsets[0]); + int numTries = MAX_TRIES, numClamped = MAX_CLAMPED; + int loopCount = 2 * float_offset_count; + if (!useFloatCoords) loopCount = 1; + if (gTestMaxImages) + { + loopCount = 1; + log_info("Testing each size only once with pixel offsets of %g for max " + "sized images.\n", + float_offsets[0]); + } + + // Get the maximum absolute error for this format + double formatAbsoluteError = + get_max_absolute_error(imageInfo->format, imageSampler); + if (gDebugTrace) + log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError); + + if (0 == initHalf + && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT) + { + initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode(queue); + if (initHalf) + { + log_info("Half rounding mode successfully detected.\n"); + } + } + + int nextLevelOffset = 0; + size_t width_lod = imageInfo->width, height_lod = imageInfo->height, + depth_lod = imageInfo->depth; + + // Loop over all mipmap levels, if we are testing mipmapped images. + for (int lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) + || (!gTestMipmaps && lod < 1); + lod++) + { + size_t resultValuesSize = width_lod * height_lod * depth_lod + * get_explicit_type_size(outputType) * 4; + BufferOwningPtr resultValues(malloc(resultValuesSize)); + float lod_float = (float)lod; + if (gTestMipmaps) + { + // Set the lod kernel arg + if (gDebugTrace) log_info(" - Working at mip level %d\n", lod); + error = clSetKernelArg(kernel, idx, sizeof(float), &lod_float); + test_error(error, "Unable to set kernel arguments"); + } + + for (int q = 0; q < loopCount; q++) + { + float offset = float_offsets[q % float_offset_count]; + + // Init the coordinates + InitFloatCoordsCommon(imageInfo, imageSampler, xOffsetValues, + yOffsetValues, zOffsetValues, + q >= float_offset_count ? -offset : offset, + q >= float_offset_count ? offset : -offset, + q >= float_offset_count ? -offset : offset, + imageSampler->normalized_coords, d, lod); + + error = + clEnqueueWriteBuffer(queue, xOffsets, CL_TRUE, 0, + sizeof(cl_float) * imageInfo->height + * imageInfo->width * imageInfo->depth, + xOffsetValues, 0, NULL, NULL); + test_error(error, "Unable to write x offsets"); + error = + clEnqueueWriteBuffer(queue, yOffsets, CL_TRUE, 0, + sizeof(cl_float) * imageInfo->height + * imageInfo->width * imageInfo->depth, + yOffsetValues, 0, NULL, NULL); + test_error(error, "Unable to write y offsets"); + error = + clEnqueueWriteBuffer(queue, zOffsets, CL_TRUE, 0, + sizeof(cl_float) * imageInfo->height + * imageInfo->width * imageInfo->depth, + zOffsetValues, 0, NULL, NULL); + test_error(error, "Unable to write z offsets"); + + + memset(resultValues, 0xff, resultValuesSize); + clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, resultValuesSize, + resultValues, 0, NULL, NULL); + + // Figure out thread dimensions + threads[0] = (size_t)width_lod; + threads[1] = (size_t)height_lod; + threads[2] = (size_t)depth_lod; + + // Run the kernel + error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, threads, + NULL, 0, NULL, NULL); + test_error(error, "Unable to run kernel"); + + // Get results + error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, + width_lod * height_lod * depth_lod + * get_explicit_type_size(outputType) + * 4, + resultValues, 0, NULL, NULL); + test_error(error, "Unable to read results from kernel"); + if (gDebugTrace) log_info(" results read\n"); + + // Validate results element by element + char *imagePtr = (char *)imageValues + nextLevelOffset; + /* + * FLOAT output type + */ + if (is_sRGBA_order(imageInfo->format->image_channel_order) + && (outputType == kFloat)) + { + // Validate float results + float *resultPtr = (float *)(char *)resultValues; + float expected[4], error = 0.0f; + float maxErr = get_max_relative_error( + imageInfo->format, imageSampler, 1 /*3D*/, + CL_FILTER_LINEAR == imageSampler->filter_mode); + + for (size_t z = 0, j = 0; z < depth_lod; z++) + { + for (size_t y = 0; y < height_lod; y++) + { + for (size_t x = 0; x < width_lod; x++, j++) + { + // Step 1: go through and see if the results verify + // for the pixel For the normalized case on a GPU we + // put in offsets to the X, Y and Z to see if we + // land on the right pixel. This addresses the + // significant inaccuracy in GPU normalization in + // OpenCL 1.0. + int checkOnlyOnePixel = 0; + int found_pixel = 0; + float offset = NORM_OFFSET; + if (!imageSampler->normalized_coords + || imageSampler->filter_mode + != CL_FILTER_NEAREST + || NORM_OFFSET == 0 +#if defined(__APPLE__) + // Apple requires its CPU implementation to do + // correctly rounded address arithmetic in all + // modes + || gDeviceType != CL_DEVICE_TYPE_GPU +#endif + ) + offset = 0.0f; // Loop only once + + for (float norm_offset_x = -offset; + norm_offset_x <= offset && !found_pixel; + norm_offset_x += NORM_OFFSET) + { + for (float norm_offset_y = -offset; + norm_offset_y <= offset && !found_pixel; + norm_offset_y += NORM_OFFSET) + { + for (float norm_offset_z = -offset; + norm_offset_z <= NORM_OFFSET + && !found_pixel; + norm_offset_z += NORM_OFFSET) + { + + int hasDenormals = 0; + FloatPixel maxPixel = + sample_image_pixel_float_offset( + imagePtr, imageInfo, + xOffsetValues[j], + yOffsetValues[j], + zOffsetValues[j], norm_offset_x, + norm_offset_y, norm_offset_z, + imageSampler, expected, 0, + &hasDenormals, lod); + + float err1 = + ABS_ERROR(sRGBmap(resultPtr[0]), + sRGBmap(expected[0])); + float err2 = + ABS_ERROR(sRGBmap(resultPtr[1]), + sRGBmap(expected[1])); + float err3 = + ABS_ERROR(sRGBmap(resultPtr[2]), + sRGBmap(expected[2])); + float err4 = ABS_ERROR(resultPtr[3], + expected[3]); + // Clamp to the minimum absolute error + // for the format + if (err1 > 0 + && err1 < formatAbsoluteError) + { + err1 = 0.0f; + } + if (err2 > 0 + && err2 < formatAbsoluteError) + { + err2 = 0.0f; + } + if (err3 > 0 + && err3 < formatAbsoluteError) + { + err3 = 0.0f; + } + if (err4 > 0 + && err4 < formatAbsoluteError) + { + err4 = 0.0f; + } + float maxErr = 0.5; + + if (!(err1 <= maxErr) + || !(err2 <= maxErr) + || !(err3 <= maxErr) + || !(err4 <= maxErr)) + { + // Try flushing the denormals + if (hasDenormals) + { + // If implementation decide to + // flush subnormals to zero, max + // error needs to be adjusted + maxErr += 4 * FLT_MIN; + + maxPixel = + sample_image_pixel_float_offset( + imagePtr, imageInfo, + xOffsetValues[j], + yOffsetValues[j], + zOffsetValues[j], + norm_offset_x, + norm_offset_y, + norm_offset_z, + imageSampler, expected, + 0, NULL, lod); + + err1 = ABS_ERROR( + sRGBmap(resultPtr[0]), + sRGBmap(expected[0])); + err2 = ABS_ERROR( + sRGBmap(resultPtr[1]), + sRGBmap(expected[1])); + err3 = ABS_ERROR( + sRGBmap(resultPtr[2]), + sRGBmap(expected[2])); + err4 = ABS_ERROR(resultPtr[3], + expected[3]); + } + } + + found_pixel = (err1 <= maxErr) + && (err2 <= maxErr) + && (err3 <= maxErr) + && (err4 <= maxErr); + } // norm_offset_z + } // norm_offset_y + } // norm_offset_x + + // Step 2: If we did not find a match, then print + // out debugging info. + if (!found_pixel) + { + // For the normalized case on a GPU we put in + // offsets to the X and Y to see if we land on + // the right pixel. This addresses the + // significant inaccuracy in GPU normalization + // in OpenCL 1.0. + checkOnlyOnePixel = 0; + int shouldReturn = 0; + for (float norm_offset_x = -offset; + norm_offset_x <= offset + && !checkOnlyOnePixel; + norm_offset_x += NORM_OFFSET) + { + for (float norm_offset_y = -offset; + norm_offset_y <= offset + && !checkOnlyOnePixel; + norm_offset_y += NORM_OFFSET) + { + for (float norm_offset_z = -offset; + norm_offset_z <= offset + && !checkOnlyOnePixel; + norm_offset_z += NORM_OFFSET) + { + + int hasDenormals = 0; + FloatPixel maxPixel = + sample_image_pixel_float_offset( + imagePtr, imageInfo, + xOffsetValues[j], + yOffsetValues[j], + zOffsetValues[j], + norm_offset_x, + norm_offset_y, + norm_offset_z, imageSampler, + expected, 0, &hasDenormals, + lod); + + float err1 = + ABS_ERROR(sRGBmap(resultPtr[0]), + sRGBmap(expected[0])); + float err2 = + ABS_ERROR(sRGBmap(resultPtr[1]), + sRGBmap(expected[1])); + float err3 = + ABS_ERROR(sRGBmap(resultPtr[2]), + sRGBmap(expected[2])); + float err4 = ABS_ERROR(resultPtr[3], + expected[3]); + float maxErr = 0.6; + + if (!(err1 <= maxErr) + || !(err2 <= maxErr) + || !(err3 <= maxErr) + || !(err4 <= maxErr)) + { + // Try flushing the denormals + if (hasDenormals) + { + // If implementation decide + // to flush subnormals to + // zero, max error needs to + // be adjusted + maxErr += 4 * FLT_MIN; + + maxPixel = + sample_image_pixel_float( + imagePtr, imageInfo, + xOffsetValues[j], + yOffsetValues[j], + zOffsetValues[j], + imageSampler, + expected, 0, NULL, + lod); + + err1 = ABS_ERROR( + sRGBmap(resultPtr[0]), + sRGBmap(expected[0])); + err2 = ABS_ERROR( + sRGBmap(resultPtr[1]), + sRGBmap(expected[1])); + err3 = ABS_ERROR( + sRGBmap(resultPtr[2]), + sRGBmap(expected[2])); + err4 = + ABS_ERROR(resultPtr[3], + expected[3]); + } + } + + if (!(err1 <= maxErr) + || !(err2 <= maxErr) + || !(err3 <= maxErr) + || !(err4 <= maxErr)) + { + log_error( + "FAILED norm_offsets: %g , " + "%g , %g:\n", + norm_offset_x, + norm_offset_y, + norm_offset_z); + + float tempOut[4]; + shouldReturn |= + determine_validation_error_offset< + float>( + imagePtr, imageInfo, + imageSampler, resultPtr, + expected, error, + xOffsetValues[j], + yOffsetValues[j], + zOffsetValues[j], + norm_offset_x, + norm_offset_y, + norm_offset_z, j, + numTries, numClamped, + true, lod); + log_error("Step by step:\n"); + FloatPixel temp = + sample_image_pixel_float_offset( + imagePtr, imageInfo, + xOffsetValues[j], + yOffsetValues[j], + zOffsetValues[j], + norm_offset_x, + norm_offset_y, + norm_offset_z, + imageSampler, tempOut, + 1 /*verbose*/, + &hasDenormals, lod); + log_error( + "\tulps: %2.2f, %2.2f, " + "%2.2f, %2.2f (max " + "allowed: %2.2f)\n\n", + Ulp_Error(resultPtr[0], + expected[0]), + Ulp_Error(resultPtr[1], + expected[1]), + Ulp_Error(resultPtr[2], + expected[2]), + Ulp_Error(resultPtr[3], + expected[3]), + Ulp_Error( + MAKE_HEX_FLOAT( + 0x1.000002p0f, + 0x1000002L, -24) + + maxErr, + MAKE_HEX_FLOAT( + 0x1.000002p0f, + 0x1000002L, -24))); + } + else + { + log_error( + "Test error: we should " + "have detected this " + "passing above.\n"); + } + } // norm_offset_z + } // norm_offset_y + } // norm_offset_x + if (shouldReturn) return 1; + } // if (!found_pixel) + + resultPtr += 4; + } + } + } + } + /* + * FLOAT output type + */ + else if (outputType == kFloat) + { + // Validate float results + float *resultPtr = (float *)(char *)resultValues; + float expected[4], error = 0.0f; + float maxErr = get_max_relative_error( + imageInfo->format, imageSampler, 1 /*3D*/, + CL_FILTER_LINEAR == imageSampler->filter_mode); + + for (size_t z = 0, j = 0; z < depth_lod; z++) + { + for (size_t y = 0; y < height_lod; y++) + { + for (size_t x = 0; x < width_lod; x++, j++) + { + // Step 1: go through and see if the results verify + // for the pixel For the normalized case on a GPU we + // put in offsets to the X, Y and Z to see if we + // land on the right pixel. This addresses the + // significant inaccuracy in GPU normalization in + // OpenCL 1.0. + int checkOnlyOnePixel = 0; + int found_pixel = 0; + float offset = NORM_OFFSET; + if (!imageSampler->normalized_coords + || imageSampler->filter_mode + != CL_FILTER_NEAREST + || NORM_OFFSET == 0 +#if defined(__APPLE__) + // Apple requires its CPU implementation to do + // correctly rounded address arithmetic in all + // modes + || gDeviceType != CL_DEVICE_TYPE_GPU +#endif + ) + offset = 0.0f; // Loop only once + + for (float norm_offset_x = -offset; + norm_offset_x <= offset && !found_pixel; + norm_offset_x += NORM_OFFSET) + { + for (float norm_offset_y = -offset; + norm_offset_y <= offset && !found_pixel; + norm_offset_y += NORM_OFFSET) + { + for (float norm_offset_z = -offset; + norm_offset_z <= NORM_OFFSET + && !found_pixel; + norm_offset_z += NORM_OFFSET) + { + + int hasDenormals = 0; + FloatPixel maxPixel = + sample_image_pixel_float_offset( + imagePtr, imageInfo, + xOffsetValues[j], + yOffsetValues[j], + zOffsetValues[j], norm_offset_x, + norm_offset_y, norm_offset_z, + imageSampler, expected, 0, + &hasDenormals, lod); + + float err1 = ABS_ERROR(resultPtr[0], + expected[0]); + float err2 = ABS_ERROR(resultPtr[1], + expected[1]); + float err3 = ABS_ERROR(resultPtr[2], + expected[2]); + float err4 = ABS_ERROR(resultPtr[3], + expected[3]); + // Clamp to the minimum absolute error + // for the format + if (err1 > 0 + && err1 < formatAbsoluteError) + { + err1 = 0.0f; + } + if (err2 > 0 + && err2 < formatAbsoluteError) + { + err2 = 0.0f; + } + if (err3 > 0 + && err3 < formatAbsoluteError) + { + err3 = 0.0f; + } + if (err4 > 0 + && err4 < formatAbsoluteError) + { + err4 = 0.0f; + } + float maxErr1 = std::max( + maxErr * maxPixel.p[0], FLT_MIN); + float maxErr2 = std::max( + maxErr * maxPixel.p[1], FLT_MIN); + float maxErr3 = std::max( + maxErr * maxPixel.p[2], FLT_MIN); + float maxErr4 = std::max( + maxErr * maxPixel.p[3], FLT_MIN); + + if (!(err1 <= maxErr1) + || !(err2 <= maxErr2) + || !(err3 <= maxErr3) + || !(err4 <= maxErr4)) + { + // Try flushing the denormals + if (hasDenormals) + { + // If implementation decide to + // flush subnormals to zero, max + // error needs to be adjusted + maxErr1 += 4 * FLT_MIN; + maxErr2 += 4 * FLT_MIN; + maxErr3 += 4 * FLT_MIN; + maxErr4 += 4 * FLT_MIN; + + maxPixel = + sample_image_pixel_float_offset( + imagePtr, imageInfo, + xOffsetValues[j], + yOffsetValues[j], + zOffsetValues[j], + norm_offset_x, + norm_offset_y, + norm_offset_z, + imageSampler, expected, + 0, NULL, lod); + + err1 = ABS_ERROR(resultPtr[0], + expected[0]); + err2 = ABS_ERROR(resultPtr[1], + expected[1]); + err3 = ABS_ERROR(resultPtr[2], + expected[2]); + err4 = ABS_ERROR(resultPtr[3], + expected[3]); + } + } + + found_pixel = (err1 <= maxErr1) + && (err2 <= maxErr2) + && (err3 <= maxErr3) + && (err4 <= maxErr4); + } // norm_offset_z + } // norm_offset_y + } // norm_offset_x + + // Step 2: If we did not find a match, then print + // out debugging info. + if (!found_pixel) + { + // For the normalized case on a GPU we put in + // offsets to the X and Y to see if we land on + // the right pixel. This addresses the + // significant inaccuracy in GPU normalization + // in OpenCL 1.0. + checkOnlyOnePixel = 0; + int shouldReturn = 0; + for (float norm_offset_x = -offset; + norm_offset_x <= offset + && !checkOnlyOnePixel; + norm_offset_x += NORM_OFFSET) + { + for (float norm_offset_y = -offset; + norm_offset_y <= offset + && !checkOnlyOnePixel; + norm_offset_y += NORM_OFFSET) + { + for (float norm_offset_z = -offset; + norm_offset_z <= offset + && !checkOnlyOnePixel; + norm_offset_z += NORM_OFFSET) + { + + int hasDenormals = 0; + FloatPixel maxPixel = + sample_image_pixel_float_offset( + imagePtr, imageInfo, + xOffsetValues[j], + yOffsetValues[j], + zOffsetValues[j], + norm_offset_x, + norm_offset_y, + norm_offset_z, imageSampler, + expected, 0, &hasDenormals, + lod); + + float err1 = ABS_ERROR(resultPtr[0], + expected[0]); + float err2 = ABS_ERROR(resultPtr[1], + expected[1]); + float err3 = ABS_ERROR(resultPtr[2], + expected[2]); + float err4 = ABS_ERROR(resultPtr[3], + expected[3]); + float maxErr1 = + std::max(maxErr * maxPixel.p[0], + FLT_MIN); + float maxErr2 = + std::max(maxErr * maxPixel.p[1], + FLT_MIN); + float maxErr3 = + std::max(maxErr * maxPixel.p[2], + FLT_MIN); + float maxErr4 = + std::max(maxErr * maxPixel.p[3], + FLT_MIN); + + + if (!(err1 <= maxErr1) + || !(err2 <= maxErr2) + || !(err3 <= maxErr3) + || !(err4 <= maxErr4)) + { + // Try flushing the denormals + if (hasDenormals) + { + maxErr1 += 4 * FLT_MIN; + maxErr2 += 4 * FLT_MIN; + maxErr3 += 4 * FLT_MIN; + maxErr4 += 4 * FLT_MIN; + + maxPixel = + sample_image_pixel_float( + imagePtr, imageInfo, + xOffsetValues[j], + yOffsetValues[j], + zOffsetValues[j], + imageSampler, + expected, 0, NULL, + lod); + + err1 = + ABS_ERROR(resultPtr[0], + expected[0]); + err2 = + ABS_ERROR(resultPtr[1], + expected[1]); + err3 = + ABS_ERROR(resultPtr[2], + expected[2]); + err4 = + ABS_ERROR(resultPtr[3], + expected[3]); + } + } + + if (!(err1 <= maxErr1) + || !(err2 <= maxErr2) + || !(err3 <= maxErr3) + || !(err4 <= maxErr4)) + { + log_error( + "FAILED norm_offsets: %g , " + "%g , %g:\n", + norm_offset_x, + norm_offset_y, + norm_offset_z); + + float tempOut[4]; + shouldReturn |= + determine_validation_error_offset< + float>( + imagePtr, imageInfo, + imageSampler, resultPtr, + expected, error, + xOffsetValues[j], + yOffsetValues[j], + zOffsetValues[j], + norm_offset_x, + norm_offset_y, + norm_offset_z, j, + numTries, numClamped, + true, lod); + log_error("Step by step:\n"); + FloatPixel temp = + sample_image_pixel_float_offset( + imagePtr, imageInfo, + xOffsetValues[j], + yOffsetValues[j], + zOffsetValues[j], + norm_offset_x, + norm_offset_y, + norm_offset_z, + imageSampler, tempOut, + 1 /*verbose*/, + &hasDenormals, lod); + log_error( + "\tulps: %2.2f, %2.2f, " + "%2.2f, %2.2f (max " + "allowed: %2.2f)\n\n", + Ulp_Error(resultPtr[0], + expected[0]), + Ulp_Error(resultPtr[1], + expected[1]), + Ulp_Error(resultPtr[2], + expected[2]), + Ulp_Error(resultPtr[3], + expected[3]), + Ulp_Error( + MAKE_HEX_FLOAT( + 0x1.000002p0f, + 0x1000002L, -24) + + maxErr, + MAKE_HEX_FLOAT( + 0x1.000002p0f, + 0x1000002L, -24))); + } + else + { + log_error( + "Test error: we should " + "have detected this " + "passing above.\n"); + } + } // norm_offset_z + } // norm_offset_y + } // norm_offset_x + if (shouldReturn) return 1; + } // if (!found_pixel) + + resultPtr += 4; + } + } + } + } + /* + * UINT output type + */ + else if (outputType == kUInt) + { + // Validate unsigned integer results + unsigned int *resultPtr = (unsigned int *)(char *)resultValues; + unsigned int expected[4]; + float error; + for (size_t z = 0, j = 0; z < depth_lod; z++) + { + for (size_t y = 0; y < height_lod; y++) + { + for (size_t x = 0; x < width_lod; x++, j++) + { + // Step 1: go through and see if the results verify + // for the pixel For the normalized case on a GPU we + // put in offsets to the X, Y and Z to see if we + // land on the right pixel. This addresses the + // significant inaccuracy in GPU normalization in + // OpenCL 1.0. + int checkOnlyOnePixel = 0; + int found_pixel = 0; + for (float norm_offset_x = -NORM_OFFSET; + norm_offset_x <= NORM_OFFSET && !found_pixel + && !checkOnlyOnePixel; + norm_offset_x += NORM_OFFSET) + { + for (float norm_offset_y = -NORM_OFFSET; + norm_offset_y <= NORM_OFFSET + && !found_pixel && !checkOnlyOnePixel; + norm_offset_y += NORM_OFFSET) + { + for (float norm_offset_z = -NORM_OFFSET; + norm_offset_z <= NORM_OFFSET + && !found_pixel && !checkOnlyOnePixel; + norm_offset_z += NORM_OFFSET) + { + + // If we are not on a GPU, or we are not + // normalized, then only test with + // offsets (0.0, 0.0) E.g., test one + // pixel. + if (!imageSampler->normalized_coords + || gDeviceType != CL_DEVICE_TYPE_GPU + || NORM_OFFSET == 0) + { + norm_offset_x = 0.0f; + norm_offset_y = 0.0f; + norm_offset_z = 0.0f; + checkOnlyOnePixel = 1; + } + + sample_image_pixel_offset( + imagePtr, imageInfo, + xOffsetValues[j], yOffsetValues[j], + zOffsetValues[j], norm_offset_x, + norm_offset_y, norm_offset_z, + imageSampler, expected, lod); + + error = errMax( + errMax(abs_diff_uint(expected[0], + resultPtr[0]), + abs_diff_uint(expected[1], + resultPtr[1])), + errMax( + abs_diff_uint(expected[2], + resultPtr[2]), + abs_diff_uint(expected[3], + resultPtr[3]))); + + if (error < MAX_ERR) found_pixel = 1; + } // norm_offset_z + } // norm_offset_y + } // norm_offset_x + + // Step 2: If we did not find a match, then print + // out debugging info. + if (!found_pixel) + { + // For the normalized case on a GPU we put in + // offsets to the X and Y to see if we land on + // the right pixel. This addresses the + // significant inaccuracy in GPU normalization + // in OpenCL 1.0. + checkOnlyOnePixel = 0; + int shouldReturn = 0; + for (float norm_offset_x = -NORM_OFFSET; + norm_offset_x <= NORM_OFFSET + && !checkOnlyOnePixel; + norm_offset_x += NORM_OFFSET) + { + for (float norm_offset_y = -NORM_OFFSET; + norm_offset_y <= NORM_OFFSET + && !checkOnlyOnePixel; + norm_offset_y += NORM_OFFSET) + { + for (float norm_offset_z = -NORM_OFFSET; + norm_offset_z <= NORM_OFFSET + && !checkOnlyOnePixel; + norm_offset_z += NORM_OFFSET) + { + + // If we are not on a GPU, or we are + // not normalized, then only test + // with offsets (0.0, 0.0) E.g., + // test one pixel. + if (!imageSampler->normalized_coords + || gDeviceType + != CL_DEVICE_TYPE_GPU + || NORM_OFFSET == 0) + { + norm_offset_x = 0.0f; + norm_offset_y = 0.0f; + norm_offset_z = 0.0f; + checkOnlyOnePixel = 1; + } + + sample_image_pixel_offset< + unsigned int>( + imagePtr, imageInfo, + xOffsetValues[j], + yOffsetValues[j], + zOffsetValues[j], norm_offset_x, + norm_offset_y, norm_offset_z, + imageSampler, expected, lod); + + error = errMax( + errMax( + abs_diff_uint(expected[0], + resultPtr[0]), + abs_diff_uint( + expected[1], + resultPtr[1])), + errMax( + abs_diff_uint(expected[2], + resultPtr[2]), + abs_diff_uint( + expected[3], + resultPtr[3]))); + + if (error > MAX_ERR) + { + log_error( + "FAILED norm_offsets: %g , " + "%g , %g:\n", + norm_offset_x, + norm_offset_y, + norm_offset_z); + shouldReturn |= + determine_validation_error_offset< + unsigned int>( + imagePtr, imageInfo, + imageSampler, resultPtr, + expected, error, + xOffsetValues[j], + yOffsetValues[j], + zOffsetValues[j], + norm_offset_x, + norm_offset_y, + norm_offset_z, j, + numTries, numClamped, + false, lod); + } + else + { + log_error( + "Test error: we should " + "have detected this " + "passing above.\n"); + } + } // norm_offset_z + } // norm_offset_y + } // norm_offset_x + if (shouldReturn) return 1; + } // if (!found_pixel) + + resultPtr += 4; + } + } + } + } + else + /* + * INT output type + */ + { + // Validate integer results + int *resultPtr = (int *)(char *)resultValues; + int expected[4]; + float error; + for (size_t z = 0, j = 0; z < depth_lod; z++) + { + for (size_t y = 0; y < height_lod; y++) + { + for (size_t x = 0; x < width_lod; x++, j++) + { + // Step 1: go through and see if the results verify + // for the pixel For the normalized case on a GPU we + // put in offsets to the X, Y and Z to see if we + // land on the right pixel. This addresses the + // significant inaccuracy in GPU normalization in + // OpenCL 1.0. + int checkOnlyOnePixel = 0; + int found_pixel = 0; + for (float norm_offset_x = -NORM_OFFSET; + norm_offset_x <= NORM_OFFSET && !found_pixel + && !checkOnlyOnePixel; + norm_offset_x += NORM_OFFSET) + { + for (float norm_offset_y = -NORM_OFFSET; + norm_offset_y <= NORM_OFFSET + && !found_pixel && !checkOnlyOnePixel; + norm_offset_y += NORM_OFFSET) + { + for (float norm_offset_z = -NORM_OFFSET; + norm_offset_z <= NORM_OFFSET + && !found_pixel && !checkOnlyOnePixel; + norm_offset_z += NORM_OFFSET) + { + + // If we are not on a GPU, or we are not + // normalized, then only test with + // offsets (0.0, 0.0) E.g., test one + // pixel. + if (!imageSampler->normalized_coords + || gDeviceType != CL_DEVICE_TYPE_GPU + || NORM_OFFSET == 0) + { + norm_offset_x = 0.0f; + norm_offset_y = 0.0f; + norm_offset_z = 0.0f; + checkOnlyOnePixel = 1; + } + + sample_image_pixel_offset( + imagePtr, imageInfo, + xOffsetValues[j], yOffsetValues[j], + zOffsetValues[j], norm_offset_x, + norm_offset_y, norm_offset_z, + imageSampler, expected, lod); + + error = errMax( + errMax(abs_diff_int(expected[0], + resultPtr[0]), + abs_diff_int(expected[1], + resultPtr[1])), + errMax(abs_diff_int(expected[2], + resultPtr[2]), + abs_diff_int(expected[3], + resultPtr[3]))); + + if (error < MAX_ERR) found_pixel = 1; + } // norm_offset_z + } // norm_offset_y + } // norm_offset_x + + // Step 2: If we did not find a match, then print + // out debugging info. + if (!found_pixel) + { + // For the normalized case on a GPU we put in + // offsets to the X and Y to see if we land on + // the right pixel. This addresses the + // significant inaccuracy in GPU normalization + // in OpenCL 1.0. + checkOnlyOnePixel = 0; + int shouldReturn = 0; + for (float norm_offset_x = -NORM_OFFSET; + norm_offset_x <= NORM_OFFSET + && !checkOnlyOnePixel; + norm_offset_x += NORM_OFFSET) + { + for (float norm_offset_y = -NORM_OFFSET; + norm_offset_y <= NORM_OFFSET + && !checkOnlyOnePixel; + norm_offset_y += NORM_OFFSET) + { + for (float norm_offset_z = -NORM_OFFSET; + norm_offset_z <= NORM_OFFSET + && !checkOnlyOnePixel; + norm_offset_z += NORM_OFFSET) + { + + // If we are not on a GPU, or we are + // not normalized, then only test + // with offsets (0.0, 0.0) E.g., + // test one pixel. + if (!imageSampler->normalized_coords + || gDeviceType + != CL_DEVICE_TYPE_GPU + || NORM_OFFSET == 0 + || NORM_OFFSET == 0 + || NORM_OFFSET == 0) + { + norm_offset_x = 0.0f; + norm_offset_y = 0.0f; + norm_offset_z = 0.0f; + checkOnlyOnePixel = 1; + } + + sample_image_pixel_offset( + imagePtr, imageInfo, + xOffsetValues[j], + yOffsetValues[j], + zOffsetValues[j], norm_offset_x, + norm_offset_y, norm_offset_z, + imageSampler, expected, lod); + + error = errMax( + errMax( + abs_diff_int(expected[0], + resultPtr[0]), + abs_diff_int(expected[1], + resultPtr[1])), + errMax( + abs_diff_int(expected[2], + resultPtr[2]), + abs_diff_int( + expected[3], + resultPtr[3]))); + + if (error > MAX_ERR) + { + log_error( + "FAILED norm_offsets: %g , " + "%g , %g:\n", + norm_offset_x, + norm_offset_y, + norm_offset_z); + shouldReturn |= + determine_validation_error_offset< + int>( + imagePtr, imageInfo, + imageSampler, resultPtr, + expected, error, + xOffsetValues[j], + yOffsetValues[j], + zOffsetValues[j], + norm_offset_x, + norm_offset_y, + norm_offset_z, j, + numTries, numClamped, + false, lod); + } + else + { + log_error( + "Test error: we should " + "have detected this " + "passing above.\n"); + } + } // norm_offset_z + } // norm_offset_y + } // norm_offset_x + if (shouldReturn) return 1; + } // if (!found_pixel) + + resultPtr += 4; + } + } + } + } + } + { + nextLevelOffset += width_lod * height_lod * depth_lod + * get_pixel_size(imageInfo->format); + width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1; + height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1; + depth_lod = (depth_lod >> 1) ? (depth_lod >> 1) : 1; + } + } + + return numTries != MAX_TRIES || numClamped != MAX_CLAMPED; +} + +void filter_undefined_bits(image_descriptor *imageInfo, char *resultPtr) +{ + // mask off the top bit (bit 15) if the image format is (CL_UNORM_SHORT_555, + // CL_RGB). (Note: OpenCL says: the top bit is undefined meaning it can be + // either 0 or 1.) + if (imageInfo->format->image_channel_data_type == CL_UNORM_SHORT_555) + { + cl_ushort *temp = (cl_ushort *)resultPtr; + temp[0] &= 0x7fff; + } +} + +int filter_rounding_errors(int forceCorrectlyRoundedWrites, + image_descriptor *imageInfo, float *errors) +{ + // We are allowed 0.6 absolute error vs. infinitely precise for some + // normalized formats + if (0 == forceCorrectlyRoundedWrites + && (imageInfo->format->image_channel_data_type == CL_UNORM_INT8 + || imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 + || imageInfo->format->image_channel_data_type == CL_UNORM_INT16 + || imageInfo->format->image_channel_data_type == CL_SNORM_INT8 + || imageInfo->format->image_channel_data_type == CL_SNORM_INT16 + || imageInfo->format->image_channel_data_type == CL_UNORM_SHORT_555 + || imageInfo->format->image_channel_data_type + == CL_UNORM_SHORT_565)) + { + if (!(fabsf(errors[0]) > 0.6f) && !(fabsf(errors[1]) > 0.6f) + && !(fabsf(errors[2]) > 0.6f) && !(fabsf(errors[3]) > 0.6f)) + return 0; + } + + return 1; +} diff --git a/test_conformance/images/kernel_read_write/test_common.h b/test_conformance/images/kernel_read_write/test_common.h index cb0d54a48c..656c41f47d 100644 --- a/test_conformance/images/kernel_read_write/test_common.h +++ b/test_conformance/images/kernel_read_write/test_common.h @@ -1,3 +1,18 @@ +// +// Copyright (c) 2021 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// #include "../testBase.h" @@ -10,7 +25,212 @@ #define MAX_CLAMPED 1 extern cl_sampler create_sampler(cl_context context, image_sampler_data *sdata, bool test_mipmaps, cl_int *error); +extern void read_image_pixel_float(void *imageData, image_descriptor *imageInfo, + int x, int y, int z, float *outData); extern bool gExtraValidateInfo; extern bool gDisableOffsets; extern bool gUseKernelSamplers; +extern cl_mem_flags gMemFlagsToUse; +extern int gtestTypesToRun; +extern uint64_t gRoundingStartValue; +extern bool gPrintOptions; + +extern int test_read_image(cl_context context, cl_command_queue queue, + cl_kernel kernel, image_descriptor *imageInfo, + image_sampler_data *imageSampler, + bool useFloatCoords, ExplicitType outputType, + MTdata d); + +extern void InitFloatCoordsCommon(image_descriptor *imageInfo, + image_sampler_data *imageSampler, + float *xOffsets, float *yOffsets, + float *zOffsets, float xfract, float yfract, + float zfract, int normalized_coords, MTdata d, + int lod); + +template +int determine_validation_error_offset( + void *imagePtr, image_descriptor *imageInfo, + image_sampler_data *imageSampler, T *resultPtr, T *expected, float error, + float x, float y, float z, float xAddressOffset, float yAddressOffset, + float zAddressOffset, size_t j, int &numTries, int &numClamped, + bool printAsFloat, int lod) +{ + int actualX, actualY, actualZ; + int found = debug_find_pixel_in_image(imagePtr, imageInfo, resultPtr, + &actualX, &actualY, &actualZ, lod); + bool clampingErr = false, clamped = false, otherClampingBug = false; + int clampedX, clampedY, clampedZ; + + size_t imageWidth = imageInfo->width, imageHeight = imageInfo->height, + imageDepth = imageInfo->depth; + + clamped = get_integer_coords_offset(x, y, z, xAddressOffset, yAddressOffset, + zAddressOffset, imageWidth, imageHeight, + imageDepth, imageSampler, imageInfo, + clampedX, clampedY, clampedZ); + + if (found) + { + // Is it a clamping bug? + if (clamped && clampedX == actualX && clampedY == actualY + && clampedZ == actualZ) + { + if ((--numClamped) == 0) + { + if (printAsFloat) + { + log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did " + "not validate! Expected (%g,%g,%g,%g), got " + "(%g,%g,%g,%g), error of %g\n", + j, x, x, y, y, z, z, (float)expected[0], + (float)expected[1], (float)expected[2], + (float)expected[3], (float)resultPtr[0], + (float)resultPtr[1], (float)resultPtr[2], + (float)resultPtr[3], error); + } + else + { + log_error( + "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not " + "validate! Expected (%x,%x,%x,%x), got (%x,%x,%x,%x)\n", + j, x, x, y, y, z, z, (int)expected[0], (int)expected[1], + (int)expected[2], (int)expected[3], (int)resultPtr[0], + (int)resultPtr[1], (int)resultPtr[2], + (int)resultPtr[3]); + } + log_error("ERROR: TEST FAILED: Read is erroneously clamping " + "coordinates!\n"); + return -1; + } + clampingErr = true; + otherClampingBug = true; + } + } + if (clamped && !otherClampingBug) + { + // If we are in clamp-to-edge mode and we're getting zeroes, it's + // possible we're getting border erroneously + if (resultPtr[0] == 0 && resultPtr[1] == 0 && resultPtr[2] == 0 + && resultPtr[3] == 0) + { + if ((--numClamped) == 0) + { + if (printAsFloat) + { + log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did " + "not validate! Expected (%g,%g,%g,%g), got " + "(%g,%g,%g,%g), error of %g\n", + j, x, x, y, y, z, z, (float)expected[0], + (float)expected[1], (float)expected[2], + (float)expected[3], (float)resultPtr[0], + (float)resultPtr[1], (float)resultPtr[2], + (float)resultPtr[3], error); + } + else + { + log_error( + "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not " + "validate! Expected (%x,%x,%x,%x), got (%x,%x,%x,%x)\n", + j, x, x, y, y, z, z, (int)expected[0], (int)expected[1], + (int)expected[2], (int)expected[3], (int)resultPtr[0], + (int)resultPtr[1], (int)resultPtr[2], + (int)resultPtr[3]); + } + log_error("ERROR: TEST FAILED: Clamping is erroneously " + "returning border color!\n"); + return -1; + } + clampingErr = true; + } + } + if (!clampingErr) + { + /* if( clamped && ( (int)x + (int)xOffsetValues[ j ] < 0 || + (int)y + (int)yOffsetValues[ j ] < 0 ) ) + { + log_error( "NEGATIVE COORDINATE ERROR\n" ); + return -1; + } + */ + if (true) // gExtraValidateInfo ) + { + if (printAsFloat) + { + log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not " + "validate!\n\tExpected (%g,%g,%g,%g),\n\t got " + "(%g,%g,%g,%g), error of %g\n", + j, x, x, y, y, z, z, (float)expected[0], + (float)expected[1], (float)expected[2], + (float)expected[3], (float)resultPtr[0], + (float)resultPtr[1], (float)resultPtr[2], + (float)resultPtr[3], error); + } + else + { + log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not " + "validate!\n\tExpected (%x,%x,%x,%x),\n\t got " + "(%x,%x,%x,%x)\n", + j, x, x, y, y, z, z, (int)expected[0], + (int)expected[1], (int)expected[2], (int)expected[3], + (int)resultPtr[0], (int)resultPtr[1], + (int)resultPtr[2], (int)resultPtr[3]); + } + log_error( + "Integer coords resolve to %d,%d,%d with img size %d,%d,%d\n", + clampedX, clampedY, clampedZ, (int)imageWidth, (int)imageHeight, + (int)imageDepth); + + if (printAsFloat && gExtraValidateInfo) + { + log_error("\nNearby values:\n"); + for (int zOff = -1; zOff <= 1; zOff++) + { + for (int yOff = -1; yOff <= 1; yOff++) + { + float top[4], real[4], bot[4]; + read_image_pixel_float(imagePtr, imageInfo, + clampedX - 1, clampedY + yOff, + clampedZ + zOff, top); + read_image_pixel_float(imagePtr, imageInfo, clampedX, + clampedY + yOff, clampedZ + zOff, + real); + read_image_pixel_float(imagePtr, imageInfo, + clampedX + 1, clampedY + yOff, + clampedZ + zOff, bot); + log_error("\t(%g,%g,%g,%g)", top[0], top[1], top[2], + top[3]); + log_error(" (%g,%g,%g,%g)", real[0], real[1], real[2], + real[3]); + log_error(" (%g,%g,%g,%g)\n", bot[0], bot[1], bot[2], + bot[3]); + } + } + } + // } + // else + // log_error( "\n" ); + if (imageSampler->filter_mode != CL_FILTER_LINEAR) + { + if (found) + log_error( + "\tValue really found in image at %d,%d,%d (%s)\n", + actualX, actualY, actualZ, + (found > 1) ? "NOT unique!!" : "unique"); + else + log_error("\tValue not actually found in image\n"); + } + log_error("\n"); + } + + numClamped = -1; // We force the clamped counter to never work + if ((--numTries) == 0) return -1; + } + return 0; +} + + +extern int filter_rounding_errors(int forceCorrectlyRoundedWrites, + image_descriptor *imageInfo, float *errors); +extern void filter_undefined_bits(image_descriptor *imageInfo, char *resultPtr); diff --git a/test_conformance/images/kernel_read_write/test_iterations.cpp b/test_conformance/images/kernel_read_write/test_iterations.cpp index 06c6c9cfe3..3b779fab4f 100644 --- a/test_conformance/images/kernel_read_write/test_iterations.cpp +++ b/test_conformance/images/kernel_read_write/test_iterations.cpp @@ -1,5 +1,5 @@ // -// Copyright (c) 2017 The Khronos Group Inc. +// Copyright (c) 2017, 2021 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,6 +16,8 @@ #include "test_common.h" #include +#include + #if defined( __APPLE__ ) #include #include @@ -23,10 +25,6 @@ #endif extern bool gTestImage2DFromBuffer; -extern uint64_t gRoundingStartValue; -extern cl_mem_flags gMemFlagsToUse; -extern int gtestTypesToRun; -extern bool gDeviceLt20; // Utility function to clamp down image sizes for certain tests to avoid // using too much memory. @@ -76,8 +74,6 @@ static const char *lodOffsetSource = static const char *offsetSource = " int offset = tidY*get_image_width(input) + tidX;\n"; -extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo, - int x, int y, int z, float *outData ); template int determine_validation_error( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler, T *resultPtr, T * expected, float error, float x, float y, float xAddressOffset, float yAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod = 0 ) @@ -440,7 +436,8 @@ int validate_image_2D_depth_results(void *imageValues, void *resultValues, doubl float err1 = ABS_ERROR(resultPtr[0], expected[0]); // Clamp to the minimum absolute error for the format if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; } - float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN ); + float maxErr1 = + std::max(maxErr * maxPixel.p[0], FLT_MIN); // Check if the result matches. if( ! (err1 <= maxErr1) ) @@ -490,7 +487,8 @@ int validate_image_2D_depth_results(void *imageValues, void *resultValues, doubl imageSampler, expected, 0, &containsDenormals ); float err1 = ABS_ERROR(resultPtr[0], expected[0]); - float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN ); + float maxErr1 = + std::max(maxErr * maxPixel.p[0], FLT_MIN); if( ! (err1 <= maxErr1) ) @@ -604,10 +602,14 @@ int validate_image_2D_results(void *imageValues, void *resultValues, double form if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; } if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; } if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; } - float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN ); - float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN ); - float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN ); - float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN ); + float maxErr1 = + std::max(maxErr * maxPixel.p[0], FLT_MIN); + float maxErr2 = + std::max(maxErr * maxPixel.p[1], FLT_MIN); + float maxErr3 = + std::max(maxErr * maxPixel.p[2], FLT_MIN); + float maxErr4 = + std::max(maxErr * maxPixel.p[3], FLT_MIN); // Check if the result matches. if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || @@ -677,10 +679,14 @@ int validate_image_2D_results(void *imageValues, void *resultValues, double form float err2 = ABS_ERROR(resultPtr[1], expected[1]); float err3 = ABS_ERROR(resultPtr[2], expected[2]); float err4 = ABS_ERROR(resultPtr[3], expected[3]); - float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN ); - float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN ); - float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN ); - float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN ); + float maxErr1 = + std::max(maxErr * maxPixel.p[0], FLT_MIN); + float maxErr2 = + std::max(maxErr * maxPixel.p[1], FLT_MIN); + float maxErr3 = + std::max(maxErr * maxPixel.p[2], FLT_MIN); + float maxErr4 = + std::max(maxErr * maxPixel.p[3], FLT_MIN); if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || @@ -1563,8 +1569,11 @@ int test_read_image_2D( cl_context context, cl_command_queue queue, cl_kernel ke return numTries != MAX_TRIES || numClamped != MAX_CLAMPED; } -int test_read_image_set_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler, - bool floatCoords, ExplicitType outputType ) +int test_read_image_set_2D(cl_device_id device, cl_context context, + cl_command_queue queue, + const cl_image_format *format, + image_sampler_data *imageSampler, bool floatCoords, + ExplicitType outputType) { char programSrc[10240]; const char *ptr; @@ -1664,7 +1673,8 @@ int test_read_image_set_2D( cl_device_id device, cl_context context, cl_command_ gTestMipmaps?", lod":" "); ptr = programSrc; - error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0"); + error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "sample_kernel"); test_error( error, "Unable to create testing kernel" ); if( gTestSmallImages ) diff --git a/test_conformance/images/kernel_read_write/test_loops.cpp b/test_conformance/images/kernel_read_write/test_loops.cpp index b1e0b7e432..795a9eda55 100644 --- a/test_conformance/images/kernel_read_write/test_loops.cpp +++ b/test_conformance/images/kernel_read_write/test_loops.cpp @@ -24,34 +24,34 @@ extern int gtestTypesToRun; extern int test_read_image_set_1D(cl_device_id device, cl_context context, cl_command_queue queue, - cl_image_format *format, + const cl_image_format *format, image_sampler_data *imageSampler, bool floatCoords, ExplicitType outputType); extern int test_read_image_set_2D(cl_device_id device, cl_context context, cl_command_queue queue, - cl_image_format *format, + const cl_image_format *format, image_sampler_data *imageSampler, bool floatCoords, ExplicitType outputType); extern int test_read_image_set_3D(cl_device_id device, cl_context context, cl_command_queue queue, - cl_image_format *format, + const cl_image_format *format, image_sampler_data *imageSampler, bool floatCoords, ExplicitType outputType); extern int test_read_image_set_1D_array(cl_device_id device, cl_context context, cl_command_queue queue, - cl_image_format *format, + const cl_image_format *format, image_sampler_data *imageSampler, bool floatCoords, ExplicitType outputType); extern int test_read_image_set_2D_array(cl_device_id device, cl_context context, cl_command_queue queue, - cl_image_format *format, + const cl_image_format *format, image_sampler_data *imageSampler, bool floatCoords, ExplicitType outputType); int test_read_image_type(cl_device_id device, cl_context context, - cl_command_queue queue, cl_image_format *format, + cl_command_queue queue, const cl_image_format *format, bool floatCoords, image_sampler_data *imageSampler, ExplicitType outputType, cl_mem_object_type imageType) { @@ -164,8 +164,9 @@ int test_read_image_type(cl_device_id device, cl_context context, } int test_read_image_formats(cl_device_id device, cl_context context, - cl_command_queue queue, cl_image_format *formatList, - bool *filterFlags, unsigned int numFormats, + cl_command_queue queue, + const std::vector &formatList, + const std::vector &filterFlags, image_sampler_data *imageSampler, ExplicitType outputType, cl_mem_object_type imageType) @@ -212,11 +213,11 @@ int test_read_image_formats(cl_device_id device, cl_context context, : "integer", get_explicit_type_name(outputType)); - for (unsigned int i = 0; i < numFormats; i++) + for (unsigned int i = 0; i < formatList.size(); i++) { if (filterFlags[i]) continue; - cl_image_format &imageFormat = formatList[i]; + const cl_image_format &imageFormat = formatList[i]; ret |= test_read_image_type(device, context, queue, &imageFormat, @@ -290,11 +291,6 @@ int test_image_set(cl_device_id device, cl_context context, } } - // Grab the list of supported image formats for integer reads - cl_image_format *formatList; - bool *filterFlags; - unsigned int numFormats; - // This flag is only for querying the list of supported formats // The flag for creating image will be set explicitly in test functions cl_mem_flags flags; @@ -326,19 +322,9 @@ int test_image_set(cl_device_id device, cl_context context, } } - if (get_format_list(context, imageType, formatList, numFormats, flags)) - return -1; - BufferOwningPtr formatListBuf(formatList); - - - filterFlags = new bool[numFormats]; - if (filterFlags == NULL) - { - log_error("ERROR: Out of memory allocating filter flags list!\n"); - return -1; - } - BufferOwningPtr filterFlagsBuf(filterFlags); - memset(filterFlags, 0, sizeof(bool) * numFormats); + // Grab the list of supported image formats for integer reads + std::vector formatList; + if (get_format_list(context, imageType, formatList, flags)) return -1; // First time through, we'll go ahead and print the formats supported, // regardless of type @@ -348,7 +334,7 @@ int test_image_set(cl_device_id device, cl_context context, { log_info("---- Supported %s %s formats for this device ---- \n", convert_image_type_to_string(imageType), flagNames); - for (unsigned int f = 0; f < numFormats; f++) + for (unsigned int f = 0; f < formatList.size(); f++) { if (IsChannelOrderSupported(formatList[f].image_channel_order) && IsChannelTypeSupported( @@ -369,8 +355,9 @@ int test_image_set(cl_device_id device, cl_context context, { if (gTypesToTest & test.type) { - if (filter_formats(formatList, filterFlags, numFormats, - test.channelTypes, gTestMipmaps) + std::vector filterFlags(formatList.size(), false); + if (filter_formats(formatList, filterFlags, test.channelTypes, + gTestMipmaps) == 0) { log_info("No formats supported for %s type\n", test.name); @@ -379,7 +366,7 @@ int test_image_set(cl_device_id device, cl_context context, { imageSampler.filter_mode = CL_FILTER_NEAREST; ret += formatTestFn(device, context, queue, formatList, - filterFlags, numFormats, &imageSampler, + filterFlags, &imageSampler, test.explicitType, imageType); // Linear filtering is only supported with floats @@ -387,7 +374,7 @@ int test_image_set(cl_device_id device, cl_context context, { imageSampler.filter_mode = CL_FILTER_LINEAR; ret += formatTestFn(device, context, queue, formatList, - filterFlags, numFormats, &imageSampler, + filterFlags, &imageSampler, test.explicitType, imageType); } } diff --git a/test_conformance/images/kernel_read_write/test_read_1D.cpp b/test_conformance/images/kernel_read_write/test_read_1D.cpp index 3e3b930d9d..68113f9a84 100644 --- a/test_conformance/images/kernel_read_write/test_read_1D.cpp +++ b/test_conformance/images/kernel_read_write/test_read_1D.cpp @@ -1,5 +1,5 @@ // -// Copyright (c) 2017 The Khronos Group Inc. +// Copyright (c) 2017, 2021 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,17 +17,14 @@ #include "test_common.h" #include +#include + #if defined( __APPLE__ ) #include #include #include #endif -extern uint64_t gRoundingStartValue; -extern cl_mem_flags gMemFlagsToUse; -extern int gtestTypesToRun; -extern bool gDeviceLt20; - const char *read1DKernelSourcePattern = "__kernel void sample_kernel( read_only image1d_t input,%s __global float *xOffsets, __global %s4 *results %s)\n" "{\n" @@ -56,8 +53,6 @@ const char *float1DKernelSource = static const char *samplerKernelArg = " sampler_t imageSampler,"; -extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo, - int x, int y, int z, float *outData ); template int determine_validation_error_1D( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler, T *resultPtr, T * expected, float error, float x, float xAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod ) @@ -676,10 +671,14 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; } if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; } if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; } - float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN ); - float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN ); - float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN ); - float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN ); + float maxErr1 = + std::max(maxErr * maxPixel.p[0], FLT_MIN); + float maxErr2 = + std::max(maxErr * maxPixel.p[1], FLT_MIN); + float maxErr3 = + std::max(maxErr * maxPixel.p[2], FLT_MIN); + float maxErr4 = + std::max(maxErr * maxPixel.p[3], FLT_MIN); // Check if the result matches. if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || @@ -739,10 +738,14 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke ABS_ERROR(resultPtr[2], expected[2]); float err4 = ABS_ERROR(resultPtr[3], expected[3]); - float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN ); - float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN ); - float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN ); - float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN ); + float maxErr1 = + std::max(maxErr * maxPixel.p[0], FLT_MIN); + float maxErr2 = + std::max(maxErr * maxPixel.p[1], FLT_MIN); + float maxErr3 = + std::max(maxErr * maxPixel.p[2], FLT_MIN); + float maxErr4 = + std::max(maxErr * maxPixel.p[3], FLT_MIN); if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || @@ -986,8 +989,11 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke return numTries != MAX_TRIES || numClamped != MAX_CLAMPED; } -int test_read_image_set_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler, - bool floatCoords, ExplicitType outputType ) +int test_read_image_set_1D(cl_device_id device, cl_context context, + cl_command_queue queue, + const cl_image_format *format, + image_sampler_data *imageSampler, bool floatCoords, + ExplicitType outputType) { char programSrc[10240]; const char *ptr; @@ -1056,7 +1062,8 @@ int test_read_image_set_1D( cl_device_id device, cl_context context, cl_command_ ptr = programSrc; - error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0"); + error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "sample_kernel"); if(error) { exit(1); diff --git a/test_conformance/images/kernel_read_write/test_read_1D_array.cpp b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp index 44797b1942..ac266ad73d 100644 --- a/test_conformance/images/kernel_read_write/test_read_1D_array.cpp +++ b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp @@ -1,5 +1,5 @@ // -// Copyright (c) 2017 The Khronos Group Inc. +// Copyright (c) 2017, 2021 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,17 +16,14 @@ #include "test_common.h" #include +#include + #if defined( __APPLE__ ) #include #include #include #endif -extern uint64_t gRoundingStartValue; -extern cl_mem_flags gMemFlagsToUse; -extern int gtestTypesToRun; -extern bool gDeviceLt20; - const char *read1DArrayKernelSourcePattern = "__kernel void sample_kernel( read_only image1d_array_t input,%s __global float *xOffsets, __global float *yOffsets, __global %s4 *results %s)\n" "{\n" @@ -63,12 +60,6 @@ const char *floatKernelSource1DArray = static const char *samplerKernelArg = " sampler_t imageSampler,"; -extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo, - int x, int y, int z, float *outData ); - -extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo, - int x, int y, int z, float *outData , int lod); - template int determine_validation_error_1D_arr( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler, T *resultPtr, T * expected, float error, float x, float y, float xAddressOffset, float yAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod ) @@ -782,10 +773,14 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; } if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; } if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; } - float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN ); - float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN ); - float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN ); - float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN ); + float maxErr1 = + std::max(maxErr * maxPixel.p[0], FLT_MIN); + float maxErr2 = + std::max(maxErr * maxPixel.p[1], FLT_MIN); + float maxErr3 = + std::max(maxErr * maxPixel.p[2], FLT_MIN); + float maxErr4 = + std::max(maxErr * maxPixel.p[3], FLT_MIN); // Check if the result matches. if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || @@ -848,10 +843,14 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker ABS_ERROR(resultPtr[2], expected[2]); float err4 = ABS_ERROR(resultPtr[3], expected[3]); - float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN ); - float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN ); - float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN ); - float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN ); + float maxErr1 = + std::max(maxErr * maxPixel.p[0], FLT_MIN); + float maxErr2 = + std::max(maxErr * maxPixel.p[1], FLT_MIN); + float maxErr3 = + std::max(maxErr * maxPixel.p[2], FLT_MIN); + float maxErr4 = + std::max(maxErr * maxPixel.p[3], FLT_MIN); if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || @@ -1095,8 +1094,11 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker return numTries != MAX_TRIES || numClamped != MAX_CLAMPED; } -int test_read_image_set_1D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler, - bool floatCoords, ExplicitType outputType ) +int test_read_image_set_1D_array(cl_device_id device, cl_context context, + cl_command_queue queue, + const cl_image_format *format, + image_sampler_data *imageSampler, + bool floatCoords, ExplicitType outputType) { char programSrc[10240]; const char *ptr; @@ -1165,7 +1167,8 @@ int test_read_image_set_1D_array( cl_device_id device, cl_context context, cl_co gTestMipmaps ? ", lod" : "" ); ptr = programSrc; - error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0"); + error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "sample_kernel"); test_error( error, "Unable to create testing kernel" ); if( gTestSmallImages ) diff --git a/test_conformance/images/kernel_read_write/test_read_2D_array.cpp b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp index d424fbdd2a..11b7881462 100644 --- a/test_conformance/images/kernel_read_write/test_read_2D_array.cpp +++ b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp @@ -1,5 +1,5 @@ // -// Copyright (c) 2017 The Khronos Group Inc. +// Copyright (c) 2017, 2021 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,9 +16,7 @@ #include "test_common.h" #include -extern cl_mem_flags gMemFlagsToUse; -extern int gtestTypesToRun; -extern bool gDeviceLt20; +#include // Utility function to clamp down image sizes for certain tests to avoid // using too much memory. @@ -79,7 +77,6 @@ const char *float2DArrayUnnormalizedCoordKernelSource = static const char *samplerKernelArg = " sampler_t imageSampler,"; -extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo, int x, int y, int z, float *outData ); template int determine_validation_error_offset_2D_array( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler, T *resultPtr, T * expected, float error, float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod ) @@ -622,7 +619,8 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker ABS_ERROR(resultPtr[0], expected[0]); // Clamp to the minimum absolute error for the format if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; } - float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN ); + float maxErr1 = std::max( + maxErr * maxPixel.p[0], FLT_MIN); if( ! (err1 <= maxErr1) ) { @@ -666,7 +664,8 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker float err1 = ABS_ERROR(resultPtr[0], expected[0]); - float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN ); + float maxErr1 = std::max( + maxErr * maxPixel.p[0], FLT_MIN); if( ! (err1 <= maxErr1) ) @@ -947,10 +946,14 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; } if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; } if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; } - float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN ); - float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN ); - float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN ); - float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN ); + float maxErr1 = std::max( + maxErr * maxPixel.p[0], FLT_MIN); + float maxErr2 = std::max( + maxErr * maxPixel.p[1], FLT_MIN); + float maxErr3 = std::max( + maxErr * maxPixel.p[2], FLT_MIN); + float maxErr4 = std::max( + maxErr * maxPixel.p[3], FLT_MIN); if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) ) { @@ -1009,10 +1012,14 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker expected[2]); float err4 = ABS_ERROR(resultPtr[3], expected[3]); - float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN ); - float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN ); - float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN ); - float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN ); + float maxErr1 = std::max( + maxErr * maxPixel.p[0], FLT_MIN); + float maxErr2 = std::max( + maxErr * maxPixel.p[1], FLT_MIN); + float maxErr3 = std::max( + maxErr * maxPixel.p[2], FLT_MIN); + float maxErr4 = std::max( + maxErr * maxPixel.p[3], FLT_MIN); if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) ) @@ -1302,8 +1309,11 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker return numTries != MAX_TRIES || numClamped != MAX_CLAMPED; } -int test_read_image_set_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler, - bool floatCoords, ExplicitType outputType ) +int test_read_image_set_2D_array(cl_device_id device, cl_context context, + cl_command_queue queue, + const cl_image_format *format, + image_sampler_data *imageSampler, + bool floatCoords, ExplicitType outputType) { char programSrc[10240]; const char *ptr; @@ -1392,7 +1402,8 @@ int test_read_image_set_2D_array( cl_device_id device, cl_context context, cl_co gTestMipmaps ? ", lod" : " " ); ptr = programSrc; - error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0"); + error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "sample_kernel"); test_error( error, "Unable to create testing kernel" ); // Run tests diff --git a/test_conformance/images/kernel_read_write/test_read_3D.cpp b/test_conformance/images/kernel_read_write/test_read_3D.cpp index ae8d737de2..860114fba4 100644 --- a/test_conformance/images/kernel_read_write/test_read_3D.cpp +++ b/test_conformance/images/kernel_read_write/test_read_3D.cpp @@ -1,5 +1,5 @@ // -// Copyright (c) 2017 The Khronos Group Inc. +// Copyright (c) 2017, 2021 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,10 +16,6 @@ #include "test_common.h" #include -extern cl_mem_flags gMemFlagsToUse; -extern int gtestTypesToRun; -extern bool gDeviceLt20; - // Utility function to clamp down image sizes for certain tests to avoid // using too much memory. static size_t reduceImageSizeRange(size_t maxDimSize, RandomSeed& seed) { @@ -77,1090 +73,12 @@ const char *float3DUnnormalizedCoordKernelSource = static const char *samplerKernelArg = " sampler_t imageSampler,"; -extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo, int x, int y, int z, float *outData ); -template int determine_validation_error_offset( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler, - T *resultPtr, T * expected, float error, - float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod ) -{ - int actualX, actualY, actualZ; - int found = debug_find_pixel_in_image( imagePtr, imageInfo, resultPtr, &actualX, &actualY, &actualZ, lod ); - bool clampingErr = false, clamped = false, otherClampingBug = false; - int clampedX, clampedY, clampedZ; - - size_t imageWidth = imageInfo->width, imageHeight = imageInfo->height, imageDepth = imageInfo->depth; - - clamped = get_integer_coords_offset( x, y, z, xAddressOffset, yAddressOffset, zAddressOffset, imageWidth, imageHeight, imageDepth, imageSampler, imageInfo, clampedX, clampedY, clampedZ ); - - if( found ) - { - // Is it a clamping bug? - if( clamped && clampedX == actualX && clampedY == actualY && clampedZ == actualZ ) - { - if( (--numClamped) == 0 ) - { - if( printAsFloat ) - { - log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate! Expected (%g,%g,%g,%g), got (%g,%g,%g,%g), error of %g\n", - j, x, x, y, y, z, z, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ], - (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error ); - } - else - { - log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate! Expected (%x,%x,%x,%x), got (%x,%x,%x,%x)\n", - j, x, x, y, y, z, z, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ], - (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] ); - } - log_error( "ERROR: TEST FAILED: Read is erroneously clamping coordinates!\n" ); - return -1; - } - clampingErr = true; - otherClampingBug = true; - } - } - if( clamped && !otherClampingBug ) - { - // If we are in clamp-to-edge mode and we're getting zeroes, it's possible we're getting border erroneously - if( resultPtr[ 0 ] == 0 && resultPtr[ 1 ] == 0 && resultPtr[ 2 ] == 0 && resultPtr[ 3 ] == 0 ) - { - if( (--numClamped) == 0 ) - { - if( printAsFloat ) - { - log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate! Expected (%g,%g,%g,%g), got (%g,%g,%g,%g), error of %g\n", - j, x, x, y, y, z, z, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ], - (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error ); - } - else - { - log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate! Expected (%x,%x,%x,%x), got (%x,%x,%x,%x)\n", - j, x, x, y, y, z, z, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ], - (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] ); - } - log_error( "ERROR: TEST FAILED: Clamping is erroneously returning border color!\n" ); - return -1; - } - clampingErr = true; - } - } - if( !clampingErr ) - { - /* if( clamped && ( (int)x + (int)xOffsetValues[ j ] < 0 || (int)y + (int)yOffsetValues[ j ] < 0 ) ) - { - log_error( "NEGATIVE COORDINATE ERROR\n" ); - return -1; - } - */ - if( true ) // gExtraValidateInfo ) - { - if( printAsFloat ) - { - log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\t got (%g,%g,%g,%g), error of %g\n", - j, x, x, y, y, z, z, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ], - (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error ); - } - else - { - log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\t got (%x,%x,%x,%x)\n", - j, x, x, y, y, z, z, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ], - (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] ); - } - log_error( "Integer coords resolve to %d,%d,%d with img size %d,%d,%d\n", clampedX, clampedY, clampedZ, (int)imageWidth, (int)imageHeight, (int)imageDepth ); - - if( printAsFloat && gExtraValidateInfo ) - { - log_error( "\nNearby values:\n" ); - for( int zOff = -1; zOff <= 1; zOff++ ) - { - for( int yOff = -1; yOff <= 1; yOff++ ) - { - float top[ 4 ], real[ 4 ], bot[ 4 ]; - read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 , clampedY + yOff, clampedZ + zOff, top ); - read_image_pixel_float( imagePtr, imageInfo, clampedX ,clampedY + yOff, clampedZ + zOff, real ); - read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, clampedZ + zOff, bot ); - log_error( "\t(%g,%g,%g,%g)",top[0], top[1], top[2], top[3] ); - log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] ); - log_error( " (%g,%g,%g,%g)\n",bot[0], bot[1], bot[2], bot[3] ); - } - } - } - // } - // else - // log_error( "\n" ); - if( imageSampler->filter_mode != CL_FILTER_LINEAR ) - { - if( found ) - log_error( "\tValue really found in image at %d,%d,%d (%s)\n", actualX, actualY, actualZ, ( found > 1 ) ? "NOT unique!!" : "unique" ); - else - log_error( "\tValue not actually found in image\n" ); - } - log_error( "\n" ); - } - - numClamped = -1; // We force the clamped counter to never work - if( ( --numTries ) == 0 ) - return -1; - } - return 0; -} - -static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float *zOffsets, float xfract, float yfract, float zfract, int normalized_coords, MTdata d , int lod) -{ - size_t i = 0; - if( gDisableOffsets ) - { - for( size_t z = 0; z < imageInfo->depth; z++ ) - { - for( size_t y = 0; y < imageInfo->height; y++ ) - { - for( size_t x = 0; x < imageInfo->width; x++, i++ ) - { - xOffsets[ i ] = (float) (xfract + (double) x); - yOffsets[ i ] = (float) (yfract + (double) y); - zOffsets[ i ] = (float) (zfract + (double) z); - } - } - } - } - else - { - for( size_t z = 0; z < imageInfo->depth; z++ ) - { - for( size_t y = 0; y < imageInfo->height; y++ ) - { - for( size_t x = 0; x < imageInfo->width; x++, i++ ) - { - xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d ))); - yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d ))); - zOffsets[ i ] = (float) (zfract + (double) ((int) z + random_in_range( -10, 10, d ))); - } - } - } - } - - if( imageSampler->addressing_mode == CL_ADDRESS_NONE ) - { - i = 0; - for( size_t z = 0; z < imageInfo->depth; z++ ) - { - for( size_t y = 0; y < imageInfo->height; y++ ) - { - for( size_t x = 0; x < imageInfo->width; x++, i++ ) - { - xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) imageInfo->width - 1.0); - yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double) imageInfo->height - 1.0); - zOffsets[ i ] = (float) CLAMP( (double) zOffsets[ i ], 0.0, (double) imageInfo->depth - 1.0); - } - } - } - } - - if( normalized_coords || gTestMipmaps) - { - i = 0; - if (lod == 0) - { - for( size_t z = 0; z < imageInfo->depth; z++ ) - { - for( size_t y = 0; y < imageInfo->height; y++ ) - { - for( size_t x = 0; x < imageInfo->width; x++, i++ ) - { - xOffsets[ i ] = (float) ((double) xOffsets[ i ] / (double) imageInfo->width); - yOffsets[ i ] = (float) ((double) yOffsets[ i ] / (double) imageInfo->height); - zOffsets[ i ] = (float) ((double) zOffsets[ i ] / (double) imageInfo->depth); - } - } - } - } - else if (gTestMipmaps) - { - size_t width_lod, height_lod, depth_lod; - - width_lod = (imageInfo->width >> lod)?(imageInfo->width >> lod):1; - height_lod = (imageInfo->height >> lod)?(imageInfo->height >> lod):1; - depth_lod = (imageInfo->depth >> lod)?(imageInfo->depth >> lod):1; - - for( size_t z = 0; z < depth_lod; z++ ) - { - for( size_t y = 0; y < height_lod; y++ ) - { - for( size_t x = 0; x < width_lod; x++, i++ ) - { - xOffsets[ i ] = (float) ((double) xOffsets[ i ] / (double) width_lod); - yOffsets[ i ] = (float) ((double) yOffsets[ i ] / (double) height_lod); - zOffsets[ i ] = (float) ((double) zOffsets[ i ] / (double) depth_lod); - } - } - } - } - } -} - -int test_read_image_3D( cl_context context, cl_command_queue queue, cl_kernel kernel, - image_descriptor *imageInfo, image_sampler_data *imageSampler, - bool useFloatCoords, ExplicitType outputType, MTdata d ) -{ - int error; - size_t threads[3]; - static int initHalf = 0; - - cl_mem_flags image_read_write_flags = CL_MEM_READ_ONLY; - - clMemWrapper xOffsets, yOffsets, zOffsets, results; - clSamplerWrapper actualSampler; - BufferOwningPtr maxImageUseHostPtrBackingStore; - - // Create offset data - BufferOwningPtr xOffsetValues(malloc(sizeof(cl_float) *imageInfo->width * imageInfo->height * imageInfo->depth)); - BufferOwningPtr yOffsetValues(malloc(sizeof(cl_float) *imageInfo->width * imageInfo->height * imageInfo->depth)); - BufferOwningPtr zOffsetValues(malloc(sizeof(cl_float) *imageInfo->width * imageInfo->height * imageInfo->depth)); - - if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT ) - if( DetectFloatToHalfRoundingMode(queue) ) - return 1; - - BufferOwningPtr imageValues; - generate_random_image_data( imageInfo, imageValues, d ); - - // Construct testing sources - clProtectedImage protImage; - clMemWrapper unprotImage; - cl_mem image; - - if(gtestTypesToRun & kReadTests) - { - image_read_write_flags = CL_MEM_READ_ONLY; - } - else - { - image_read_write_flags = CL_MEM_READ_WRITE; - } - - if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR ) - { - // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian) - // Do not use protected images for max image size test since it rounds the row size to a page size - if (gTestMaxImages) { - generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d ); - unprotImage = create_image_3d( context, - image_read_write_flags | CL_MEM_USE_HOST_PTR, - imageInfo->format, - imageInfo->width, - imageInfo->height, - imageInfo->depth, - ( gEnablePitch ? imageInfo->rowPitch : 0 ), - ( gEnablePitch ? imageInfo->slicePitch : 0 ), maxImageUseHostPtrBackingStore, &error ); - } else { - error = protImage.Create(context, image_read_write_flags, - imageInfo->format, imageInfo->width, - imageInfo->height, imageInfo->depth); - } - if( error != CL_SUCCESS ) - { - log_error( "ERROR: Unable to create 3D image of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) ); - return error; - } - if (gTestMaxImages) - image = (cl_mem)unprotImage; - else - image = (cl_mem)protImage; - } - else if( gMemFlagsToUse == CL_MEM_COPY_HOST_PTR ) - { - // Don't use clEnqueueWriteImage; just use copy host ptr to get the data in - unprotImage = create_image_3d( context, - image_read_write_flags | CL_MEM_COPY_HOST_PTR, - imageInfo->format, - imageInfo->width, - imageInfo->height, - imageInfo->depth, - ( gEnablePitch ? imageInfo->rowPitch : 0 ), - ( gEnablePitch ? imageInfo->slicePitch : 0 ), - imageValues, &error ); - if( error != CL_SUCCESS ) - { - log_error( "ERROR: Unable to create 3D image of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) ); - return error; - } - image = unprotImage; - } - else // Either CL_MEM_ALLOC_HOST_PTR or none - { - // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise - // it works just as if no flag is specified, so we just do the same thing either way - if ( !gTestMipmaps ) - { - unprotImage = create_image_3d( context, - image_read_write_flags | gMemFlagsToUse, - imageInfo->format, - imageInfo->width, imageInfo->height, imageInfo->depth, - ( gEnablePitch ? imageInfo->rowPitch : 0 ), - ( gEnablePitch ? imageInfo->slicePitch : 0 ), - imageValues, &error ); - if( error != CL_SUCCESS ) - { - log_error( "ERROR: Unable to create 3D image of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) ); - return error; - } - image = unprotImage; - } - else - { - cl_image_desc image_desc = {0}; - image_desc.image_type = CL_MEM_OBJECT_IMAGE3D; - image_desc.image_width = imageInfo->width; - image_desc.image_height = imageInfo->height; - image_desc.image_depth = imageInfo->depth; - image_desc.num_mip_levels = imageInfo->num_mip_levels; - - - unprotImage = clCreateImage( context, - image_read_write_flags, - imageInfo->format, &image_desc, NULL, &error); - if( error != CL_SUCCESS ) - { - log_error( "ERROR: Unable to create %d level mipmapped 3D image of size %d x %d x %d (pitch %d, %d ) (%s)",(int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) ); - return error; - } - image = unprotImage; - } - } - - if( gMemFlagsToUse != CL_MEM_COPY_HOST_PTR ) - { - size_t origin[ 4 ] = { 0, 0, 0, 0}; - size_t region[ 3 ] = { imageInfo->width, imageInfo->height, imageInfo->depth }; - - if( gDebugTrace ) - log_info( " - Writing image...\n" ); - - if ( !gTestMipmaps ) - { - - error = clEnqueueWriteImage(queue, image, CL_TRUE, - origin, region, gEnablePitch ? imageInfo->rowPitch : 0, gEnablePitch ? imageInfo->slicePitch : 0, - imageValues , 0, NULL, NULL); - - if (error != CL_SUCCESS) - { - log_error( "ERROR: Unable to write to 3D image of size %d x %d x %d \n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth ); - return error; - } - } - else - { - int nextLevelOffset = 0; - - for (int i =0; i < imageInfo->num_mip_levels; i++) - { origin[3] = i; - error = clEnqueueWriteImage(queue, image, CL_TRUE, - origin, region, /*gEnablePitch ? imageInfo->rowPitch :*/ 0, /*gEnablePitch ? imageInfo->slicePitch :*/ 0, - ((char*)imageValues + nextLevelOffset), 0, NULL, NULL); - if (error != CL_SUCCESS) - { - log_error( "ERROR: Unable to write to %d level mipmapped 3D image of size %d x %d x %d\n", (int)imageInfo->num_mip_levels,(int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth ); - return error; - } - nextLevelOffset += region[0]*region[1]*region[2]*get_pixel_size(imageInfo->format); - //Subsequent mip level dimensions keep halving - region[0] = region[0] >> 1 ? region[0] >> 1 : 1; - region[1] = region[1] >> 1 ? region[1] >> 1 : 1; - region[2] = region[2] >> 1 ? region[2] >> 1 : 1; - } - } - } - - xOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, - sizeof(cl_float) * imageInfo->width - * imageInfo->height * imageInfo->depth, - xOffsetValues, &error); - test_error( error, "Unable to create x offset buffer" ); - yOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, - sizeof(cl_float) * imageInfo->width - * imageInfo->height * imageInfo->depth, - yOffsetValues, &error); - test_error( error, "Unable to create y offset buffer" ); - zOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, - sizeof(cl_float) * imageInfo->width - * imageInfo->height * imageInfo->depth, - zOffsetValues, &error); - test_error( error, "Unable to create y offset buffer" ); - results = - clCreateBuffer(context, CL_MEM_READ_WRITE, - get_explicit_type_size(outputType) * 4 * imageInfo->width - * imageInfo->height * imageInfo->depth, - NULL, &error); - test_error( error, "Unable to create result buffer" ); - - // Create sampler to use - actualSampler = create_sampler(context, imageSampler, gTestMipmaps, &error); - test_error(error, "Unable to create image sampler"); - - // Set arguments - int idx = 0; - error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &image ); - test_error( error, "Unable to set kernel arguments" ); - if( !gUseKernelSamplers ) - { - error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler ); - test_error( error, "Unable to set kernel arguments" ); - } - error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &xOffsets ); - test_error( error, "Unable to set kernel arguments" ); - error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &yOffsets ); - test_error( error, "Unable to set kernel arguments" ); - error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &zOffsets ); - test_error( error, "Unable to set kernel arguments" ); - error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results ); - test_error( error, "Unable to set kernel arguments" ); - - const float float_offsets[] = { 0.0f, MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30), 0.25f, 0.3f, 0.5f - FLT_EPSILON/4.0f, 0.5f, 0.9f, 1.0f - FLT_EPSILON/2 }; - int float_offset_count = sizeof( float_offsets) / sizeof( float_offsets[0] ); - int numTries = MAX_TRIES, numClamped = MAX_CLAMPED; - int loopCount = 2 * float_offset_count; - if( ! useFloatCoords ) - loopCount = 1; - if (gTestMaxImages) { - loopCount = 1; - log_info("Testing each size only once with pixel offsets of %g for max sized images.\n", float_offsets[0]); - } - - // Get the maximum absolute error for this format - double formatAbsoluteError = get_max_absolute_error(imageInfo->format, imageSampler); - if (gDebugTrace) log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError); - - if (0 == initHalf && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT ) { - initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode( queue ); - if (initHalf) { - log_info("Half rounding mode successfully detected.\n"); - } - } - - int nextLevelOffset = 0; - size_t width_lod = imageInfo->width, height_lod = imageInfo->height, depth_lod = imageInfo->depth; - - //Loop over all mipmap levels, if we are testing mipmapped images. - for(int lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++) - { - size_t resultValuesSize = width_lod * height_lod * depth_lod * get_explicit_type_size( outputType ) * 4; - BufferOwningPtr resultValues(malloc( resultValuesSize )); - float lod_float = (float)lod; - if (gTestMipmaps) { - //Set the lod kernel arg - if(gDebugTrace) - log_info(" - Working at mip level %d\n", lod); - error = clSetKernelArg( kernel, idx, sizeof( float ), &lod_float); - test_error( error, "Unable to set kernel arguments" ); - } - - for( int q = 0; q < loopCount; q++ ) - { - float offset = float_offsets[ q % float_offset_count ]; - - // Init the coordinates - InitFloatCoords( imageInfo, imageSampler, xOffsetValues, yOffsetValues, zOffsetValues, - q>=float_offset_count ? -offset: offset, - q>=float_offset_count ? offset: -offset, - q>=float_offset_count ? -offset: offset, - imageSampler->normalized_coords, d, lod ); - - error = clEnqueueWriteBuffer( queue, xOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width * imageInfo->depth, xOffsetValues, 0, NULL, NULL ); - test_error( error, "Unable to write x offsets" ); - error = clEnqueueWriteBuffer( queue, yOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width * imageInfo->depth, yOffsetValues, 0, NULL, NULL ); - test_error( error, "Unable to write y offsets" ); - error = clEnqueueWriteBuffer( queue, zOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width * imageInfo->depth, zOffsetValues, 0, NULL, NULL ); - test_error( error, "Unable to write z offsets" ); - - - memset( resultValues, 0xff, resultValuesSize ); - clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL ); - - // Figure out thread dimensions - threads[0] = (size_t)width_lod; - threads[1] = (size_t)height_lod; - threads[2] = (size_t)depth_lod; - - // Run the kernel - error = clEnqueueNDRangeKernel( queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL ); - test_error( error, "Unable to run kernel" ); - - // Get results - error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, width_lod * height_lod * depth_lod * get_explicit_type_size( outputType ) * 4, resultValues, 0, NULL, NULL ); - test_error( error, "Unable to read results from kernel" ); - if( gDebugTrace ) - log_info( " results read\n" ); - - // Validate results element by element - char *imagePtr = (char*)imageValues + nextLevelOffset; - /* - * FLOAT output type - */ - if(is_sRGBA_order(imageInfo->format->image_channel_order) && (outputType == kFloat) ) - { - // Validate float results - float *resultPtr = (float *)(char *)resultValues; - float expected[4], error=0.0f; - float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 1 /*3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode ); - - for( size_t z = 0, j = 0; z < depth_lod; z++ ) - { - for( size_t y = 0; y < height_lod; y++ ) - { - for( size_t x = 0; x < width_lod; x++, j++ ) - { - // Step 1: go through and see if the results verify for the pixel - // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - int checkOnlyOnePixel = 0; - int found_pixel = 0; - float offset = NORM_OFFSET; - if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0 -#if defined( __APPLE__ ) - // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes - || gDeviceType != CL_DEVICE_TYPE_GPU -#endif - ) - offset = 0.0f; // Loop only once - - for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel ; norm_offset_x += NORM_OFFSET) { - for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel ; norm_offset_y += NORM_OFFSET) { - for (float norm_offset_z = -offset; norm_offset_z <= NORM_OFFSET && !found_pixel; norm_offset_z += NORM_OFFSET) { - - int hasDenormals = 0; - FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, expected, 0, &hasDenormals, lod ); - - float err1 = - ABS_ERROR(sRGBmap(resultPtr[0]), - sRGBmap(expected[0])); - float err2 = - ABS_ERROR(sRGBmap(resultPtr[1]), - sRGBmap(expected[1])); - float err3 = - ABS_ERROR(sRGBmap(resultPtr[2]), - sRGBmap(expected[2])); - float err4 = - ABS_ERROR(resultPtr[3], expected[3]); - // Clamp to the minimum absolute error for the format - if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; } - if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; } - if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; } - if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; } - float maxErr = 0.5; - - if( ! (err1 <= maxErr) || ! (err2 <= maxErr) || ! (err3 <= maxErr) || ! (err4 <= maxErr) ) - { - // Try flushing the denormals - if( hasDenormals ) - { - // If implementation decide to flush subnormals to zero, - // max error needs to be adjusted - maxErr += 4 * FLT_MIN; - - maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, expected, 0, NULL, lod ); - - err1 = - ABS_ERROR(sRGBmap(resultPtr[0]), - sRGBmap(expected[0])); - err2 = - ABS_ERROR(sRGBmap(resultPtr[1]), - sRGBmap(expected[1])); - err3 = - ABS_ERROR(sRGBmap(resultPtr[2]), - sRGBmap(expected[2])); - err4 = ABS_ERROR(resultPtr[3], - expected[3]); - } - } - - found_pixel = (err1 <= maxErr) && (err2 <= maxErr) && (err3 <= maxErr) && (err4 <= maxErr); - }//norm_offset_z - }//norm_offset_y - }//norm_offset_x - - // Step 2: If we did not find a match, then print out debugging info. - if (!found_pixel) { - // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - checkOnlyOnePixel = 0; - int shouldReturn = 0; - for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) { - for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) { - for (float norm_offset_z = -offset; norm_offset_z <= offset && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) { - - int hasDenormals = 0; - FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, expected, 0, &hasDenormals, lod ); - - float err1 = - ABS_ERROR(sRGBmap(resultPtr[0]), - sRGBmap(expected[0])); - float err2 = - ABS_ERROR(sRGBmap(resultPtr[1]), - sRGBmap(expected[1])); - float err3 = - ABS_ERROR(sRGBmap(resultPtr[2]), - sRGBmap(expected[2])); - float err4 = ABS_ERROR(resultPtr[3], - expected[3]); - float maxErr = 0.6; - - if( ! (err1 <= maxErr) || ! (err2 <= maxErr) || ! (err3 <= maxErr) || ! (err4 <= maxErr) ) - { - // Try flushing the denormals - if( hasDenormals ) - { - // If implementation decide to flush subnormals to zero, - // max error needs to be adjusted - maxErr += 4 * FLT_MIN; - - maxPixel = sample_image_pixel_float( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - imageSampler, expected, 0, NULL, lod ); - - err1 = ABS_ERROR( - sRGBmap(resultPtr[0]), - sRGBmap(expected[0])); - err2 = ABS_ERROR( - sRGBmap(resultPtr[1]), - sRGBmap(expected[1])); - err3 = ABS_ERROR( - sRGBmap(resultPtr[2]), - sRGBmap(expected[2])); - err4 = ABS_ERROR(resultPtr[3], - expected[3]); - } - } - - if( ! (err1 <= maxErr) || ! (err2 <= maxErr) || ! (err3 <= maxErr) || ! (err4 <= maxErr) ) - { - log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z); - - float tempOut[4]; - shouldReturn |= determine_validation_error_offset( imagePtr, imageInfo, imageSampler, resultPtr, - expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j], - norm_offset_x, norm_offset_y, norm_offset_z, j, - numTries, numClamped, true, lod ); - log_error( "Step by step:\n" ); - FloatPixel temp = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, tempOut, 1 /*verbose*/, &hasDenormals, lod); - log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f (max allowed: %2.2f)\n\n", - Ulp_Error( resultPtr[0], expected[0] ), - Ulp_Error( resultPtr[1], expected[1] ), - Ulp_Error( resultPtr[2], expected[2] ), - Ulp_Error( resultPtr[3], expected[3] ), - Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) ); - } else { - log_error("Test error: we should have detected this passing above.\n"); - } - }//norm_offset_z - }//norm_offset_y - }//norm_offset_x - if( shouldReturn ) - return 1; - } // if (!found_pixel) - - resultPtr += 4; - } - } - } - } - /* - * FLOAT output type - */ - else if( outputType == kFloat ) - { - // Validate float results - float *resultPtr = (float *)(char *)resultValues; - float expected[4], error=0.0f; - float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 1 /*3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode ); - - for( size_t z = 0, j = 0; z < depth_lod; z++ ) - { - for( size_t y = 0; y < height_lod; y++ ) - { - for( size_t x = 0; x < width_lod; x++, j++ ) - { - // Step 1: go through and see if the results verify for the pixel - // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - int checkOnlyOnePixel = 0; - int found_pixel = 0; - float offset = NORM_OFFSET; - if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0 -#if defined( __APPLE__ ) - // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes - || gDeviceType != CL_DEVICE_TYPE_GPU -#endif - ) - offset = 0.0f; // Loop only once - - for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel ; norm_offset_x += NORM_OFFSET) { - for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel ; norm_offset_y += NORM_OFFSET) { - for (float norm_offset_z = -offset; norm_offset_z <= NORM_OFFSET && !found_pixel; norm_offset_z += NORM_OFFSET) { - - int hasDenormals = 0; - FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, expected, 0, &hasDenormals, lod ); - - float err1 = - ABS_ERROR(resultPtr[0], expected[0]); - float err2 = - ABS_ERROR(resultPtr[1], expected[1]); - float err3 = - ABS_ERROR(resultPtr[2], expected[2]); - float err4 = - ABS_ERROR(resultPtr[3], expected[3]); - // Clamp to the minimum absolute error for the format - if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; } - if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; } - if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; } - if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; } - float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN ); - float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN ); - float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN ); - float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN ); - - if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) ) - { - // Try flushing the denormals - if( hasDenormals ) - { - // If implementation decide to flush subnormals to zero, - // max error needs to be adjusted - maxErr1 += 4 * FLT_MIN; - maxErr2 += 4 * FLT_MIN; - maxErr3 += 4 * FLT_MIN; - maxErr4 += 4 * FLT_MIN; - - maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, expected, 0, NULL, lod ); - - err1 = ABS_ERROR(resultPtr[0], - expected[0]); - err2 = ABS_ERROR(resultPtr[1], - expected[1]); - err3 = ABS_ERROR(resultPtr[2], - expected[2]); - err4 = ABS_ERROR(resultPtr[3], - expected[3]); - } - } - - found_pixel = (err1 <= maxErr1) && (err2 <= maxErr2) && (err3 <= maxErr3) && (err4 <= maxErr4); - }//norm_offset_z - }//norm_offset_y - }//norm_offset_x - - // Step 2: If we did not find a match, then print out debugging info. - if (!found_pixel) { - // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - checkOnlyOnePixel = 0; - int shouldReturn = 0; - for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) { - for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) { - for (float norm_offset_z = -offset; norm_offset_z <= offset && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) { - - int hasDenormals = 0; - FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, expected, 0, &hasDenormals, lod ); - - float err1 = ABS_ERROR(resultPtr[0], - expected[0]); - float err2 = ABS_ERROR(resultPtr[1], - expected[1]); - float err3 = ABS_ERROR(resultPtr[2], - expected[2]); - float err4 = ABS_ERROR(resultPtr[3], - expected[3]); - float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN ); - float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN ); - float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN ); - float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN ); - - - if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) ) - { - // Try flushing the denormals - if( hasDenormals ) - { - maxErr1 += 4 * FLT_MIN; - maxErr2 += 4 * FLT_MIN; - maxErr3 += 4 * FLT_MIN; - maxErr4 += 4 * FLT_MIN; - - maxPixel = sample_image_pixel_float( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - imageSampler, expected, 0, NULL, lod ); - - err1 = ABS_ERROR(resultPtr[0], - expected[0]); - err2 = ABS_ERROR(resultPtr[1], - expected[1]); - err3 = ABS_ERROR(resultPtr[2], - expected[2]); - err4 = ABS_ERROR(resultPtr[3], - expected[3]); - } - } - - if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) ) - { - log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z); - - float tempOut[4]; - shouldReturn |= determine_validation_error_offset( imagePtr, imageInfo, imageSampler, resultPtr, - expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j], - norm_offset_x, norm_offset_y, norm_offset_z, j, - numTries, numClamped, true, lod ); - log_error( "Step by step:\n" ); - FloatPixel temp = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, tempOut, 1 /*verbose*/, &hasDenormals, lod); - log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f (max allowed: %2.2f)\n\n", - Ulp_Error( resultPtr[0], expected[0] ), - Ulp_Error( resultPtr[1], expected[1] ), - Ulp_Error( resultPtr[2], expected[2] ), - Ulp_Error( resultPtr[3], expected[3] ), - Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) ); - } else { - log_error("Test error: we should have detected this passing above.\n"); - } - }//norm_offset_z - }//norm_offset_y - }//norm_offset_x - if( shouldReturn ) - return 1; - } // if (!found_pixel) - - resultPtr += 4; - } - } - } - } - /* - * UINT output type - */ - else if( outputType == kUInt ) - { - // Validate unsigned integer results - unsigned int *resultPtr = (unsigned int *)(char *)resultValues; - unsigned int expected[4]; - float error; - for( size_t z = 0, j = 0; z < depth_lod; z++ ) - { - for( size_t y = 0; y < height_lod; y++ ) - { - for( size_t x = 0; x < width_lod; x++, j++ ) - { - // Step 1: go through and see if the results verify for the pixel - // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - int checkOnlyOnePixel = 0; - int found_pixel = 0; - for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) { - for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) { - for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) { - - // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) - // E.g., test one pixel. - if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) { - norm_offset_x = 0.0f; - norm_offset_y = 0.0f; - norm_offset_z = 0.0f; - checkOnlyOnePixel = 1; - } - - sample_image_pixel_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, expected, lod ); - - error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ), - errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) ); - - if (error < MAX_ERR) - found_pixel = 1; - }//norm_offset_z - }//norm_offset_y - }//norm_offset_x - - // Step 2: If we did not find a match, then print out debugging info. - if (!found_pixel) { - // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - checkOnlyOnePixel = 0; - int shouldReturn = 0; - for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) { - for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) { - for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) { - - // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) - // E.g., test one pixel. - if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) { - norm_offset_x = 0.0f; - norm_offset_y = 0.0f; - norm_offset_z = 0.0f; - checkOnlyOnePixel = 1; - } - - sample_image_pixel_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, expected, lod ); - - error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ), - errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) ); - - if( error > MAX_ERR ) - { - log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z); - shouldReturn |= determine_validation_error_offset( imagePtr, imageInfo, imageSampler, resultPtr, - expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j], - norm_offset_x, norm_offset_y, norm_offset_z, - j, numTries, numClamped, false, lod ); - } else { - log_error("Test error: we should have detected this passing above.\n"); - } - }//norm_offset_z - }//norm_offset_y - }//norm_offset_x - if( shouldReturn ) - return 1; - } // if (!found_pixel) - - resultPtr += 4; - } - } - } - } - else - /* - * INT output type - */ - { - // Validate integer results - int *resultPtr = (int *)(char *)resultValues; - int expected[4]; - float error; - for( size_t z = 0, j = 0; z < depth_lod; z++ ) - { - for( size_t y = 0; y < height_lod; y++ ) - { - for( size_t x = 0; x < width_lod; x++, j++ ) - { - // Step 1: go through and see if the results verify for the pixel - // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - int checkOnlyOnePixel = 0; - int found_pixel = 0; - for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) { - for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) { - for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) { - - // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) - // E.g., test one pixel. - if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) { - norm_offset_x = 0.0f; - norm_offset_y = 0.0f; - norm_offset_z = 0.0f; - checkOnlyOnePixel = 1; - } - - sample_image_pixel_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, expected, lod ); - - error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ), - errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) ); - - if (error < MAX_ERR) - found_pixel = 1; - }//norm_offset_z - }//norm_offset_y - }//norm_offset_x - - // Step 2: If we did not find a match, then print out debugging info. - if (!found_pixel) { - // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - checkOnlyOnePixel = 0; - int shouldReturn = 0; - for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) { - for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) { - for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) { - - // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) - // E.g., test one pixel. - if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0 || NORM_OFFSET == 0 || NORM_OFFSET == 0) { - norm_offset_x = 0.0f; - norm_offset_y = 0.0f; - norm_offset_z = 0.0f; - checkOnlyOnePixel = 1; - } - - sample_image_pixel_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, expected, lod ); - - error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ), - errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) ); - - if( error > MAX_ERR ) - { - log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z); - shouldReturn |= determine_validation_error_offset( imagePtr, imageInfo, imageSampler, resultPtr, - expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j], - norm_offset_x, norm_offset_y, norm_offset_z, - j, numTries, numClamped, false, lod ); - } else { - log_error("Test error: we should have detected this passing above.\n"); - } - }//norm_offset_z - }//norm_offset_y - }//norm_offset_x - if( shouldReturn ) - return 1; - } // if (!found_pixel) - - resultPtr += 4; - } - } - } - } - } - { - nextLevelOffset += width_lod * height_lod * depth_lod * get_pixel_size(imageInfo->format); - width_lod = ( width_lod >> 1) ?( width_lod >> 1) : 1; - height_lod = ( height_lod >> 1) ?( height_lod >> 1) : 1; - depth_lod = ( depth_lod >> 1) ?( depth_lod >> 1) : 1; - } - } - - return numTries != MAX_TRIES || numClamped != MAX_CLAMPED; -} -int test_read_image_set_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler, - bool floatCoords, ExplicitType outputType ) +int test_read_image_set_3D(cl_device_id device, cl_context context, + cl_command_queue queue, + const cl_image_format *format, + image_sampler_data *imageSampler, bool floatCoords, + ExplicitType outputType) { char programSrc[10240]; const char *ptr; @@ -1230,7 +148,8 @@ int test_read_image_set_3D( cl_device_id device, cl_context context, cl_command_ gTestMipmaps? ",lod":" "); ptr = programSrc; - error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0"); + error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "sample_kernel"); test_error( error, "Unable to create testing kernel" ); // Run tests @@ -1250,7 +169,9 @@ int test_read_image_set_3D( cl_device_id device, cl_context context, cl_command_ if( gDebugTrace ) log_info( " at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth ); - int retCode = test_read_image_3D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed ); + int retCode = test_read_image( + context, queue, kernel, &imageInfo, imageSampler, + floatCoords, outputType, seed); if( retCode ) return retCode; } @@ -1277,7 +198,9 @@ int test_read_image_set_3D( cl_device_id device, cl_context context, cl_command_ log_info("Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ]); if( gDebugTrace ) log_info( " at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] ); - int retCode = test_read_image_3D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed ); + int retCode = + test_read_image(context, queue, kernel, &imageInfo, + imageSampler, floatCoords, outputType, seed); if( retCode ) return retCode; } @@ -1291,7 +214,9 @@ int test_read_image_set_3D( cl_device_id device, cl_context context, cl_command_ imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format ); imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch; - int retCode = test_read_image_3D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed ); + int retCode = + test_read_image(context, queue, kernel, &imageInfo, imageSampler, + floatCoords, outputType, seed); if( retCode ) return retCode; } @@ -1341,7 +266,9 @@ int test_read_image_set_3D( cl_device_id device, cl_context context, cl_command_ if ( gTestMipmaps ) log_info( " and number of mip levels :%d\n", (int)imageInfo.num_mip_levels ); } - int retCode = test_read_image_3D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed ); + int retCode = + test_read_image(context, queue, kernel, &imageInfo, + imageSampler, floatCoords, outputType, seed); if( retCode ) return retCode; } diff --git a/test_conformance/images/kernel_read_write/test_write_1D.cpp b/test_conformance/images/kernel_read_write/test_write_1D.cpp index 68b913e945..1556a76a09 100644 --- a/test_conformance/images/kernel_read_write/test_write_1D.cpp +++ b/test_conformance/images/kernel_read_write/test_write_1D.cpp @@ -14,6 +14,7 @@ // limitations under the License. // #include "../testBase.h" +#include "test_common.h" #if !defined(_WIN32) #include @@ -21,7 +22,6 @@ extern cl_mem_flags gMemFlagsToUse; extern int gtestTypesToRun; -extern bool gDeviceLt20; extern bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo ); extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor* imageInfo ); @@ -396,6 +396,8 @@ int test_write_image_1D( cl_device_id device, cl_context context, cl_command_que } else { + filter_undefined_bits(imageInfo, resultPtr); + // Exact result passes every time if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 ) { @@ -404,21 +406,8 @@ int test_write_image_1D( cl_device_id device, cl_context context, cl_command_que float errors[4] = {NAN, NAN, NAN, NAN}; pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors ); - // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats - if( 0 == forceCorrectlyRoundedWrites && - ( - imageInfo->format->image_channel_data_type == CL_UNORM_INT8 || - imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 || - imageInfo->format->image_channel_data_type == CL_UNORM_INT16 || - imageInfo->format->image_channel_data_type == CL_SNORM_INT8 || - imageInfo->format->image_channel_data_type == CL_SNORM_INT16 - )) - { - if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) && - ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f) ) - failure = 0; - } - + failure = filter_rounding_errors( + forceCorrectlyRoundedWrites, imageInfo, errors); if( failure ) { @@ -459,6 +448,56 @@ int test_write_image_1D( cl_device_id device, cl_context context, cl_command_que log_error( " Actual: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] ); log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] ); break; + case CL_UNORM_SHORT_565: { + cl_uint *ref_value = + (cl_uint *)resultBuffer; + cl_uint *test_value = + (cl_uint *)resultPtr; + + log_error(" Expected: 0x%2.2x Actual: " + "0x%2.2x \n", + ref_value[0], test_value[0]); + + log_error(" Expected: 0x%2.2x " + "0x%2.2x 0x%2.2x \n", + ref_value[0] & 0x1F, + (ref_value[0] >> 5) & 0x3F, + (ref_value[0] >> 11) & 0x1F); + log_error(" Actual: 0x%2.2x " + "0x%2.2x 0x%2.2x \n", + test_value[0] & 0x1F, + (test_value[0] >> 5) & 0x3F, + (test_value[0] >> 11) & 0x1F); + log_error(" Error: %f %f %f %f\n", + errors[0], errors[1], + errors[2]); + break; + } + case CL_UNORM_SHORT_555: { + cl_uint *ref_value = + (cl_uint *)resultBuffer; + cl_uint *test_value = + (cl_uint *)resultPtr; + + log_error(" Expected: 0x%2.2x Actual: " + "0x%2.2x \n", + ref_value[0], test_value[0]); + + log_error(" Expected: 0x%2.2x " + "0x%2.2x 0x%2.2x \n", + ref_value[0] & 0x1F, + (ref_value[0] >> 5) & 0x1F, + (ref_value[0] >> 10) & 0x1F); + log_error(" Actual: 0x%2.2x " + "0x%2.2x 0x%2.2x \n", + test_value[0] & 0x1F, + (test_value[0] >> 5) & 0x1F, + (test_value[0] >> 10) & 0x1F); + log_error(" Error: %f %f %f %f\n", + errors[0], errors[1], + errors[2]); + break; + } case CL_UNORM_INT16: case CL_SNORM_INT16: case CL_UNSIGNED_INT16: @@ -522,7 +561,10 @@ int test_write_image_1D( cl_device_id device, cl_context context, cl_command_que return totalErrors; } -int test_write_image_1D_set( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType inputType, MTdata d ) +int test_write_image_1D_set(cl_device_id device, cl_context context, + cl_command_queue queue, + const cl_image_format *format, + ExplicitType inputType, MTdata d) { char programSrc[10240]; const char *ptr; @@ -580,7 +622,8 @@ int test_write_image_1D_set( cl_device_id device, cl_context context, cl_command gTestMipmaps ? ", lod" :"" ); ptr = programSrc; - error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0"); + error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "sample_kernel"); test_error( error, "Unable to create testing kernel" ); // Run tests diff --git a/test_conformance/images/kernel_read_write/test_write_1D_array.cpp b/test_conformance/images/kernel_read_write/test_write_1D_array.cpp index 57bdd546b6..e9aa8d2a6a 100644 --- a/test_conformance/images/kernel_read_write/test_write_1D_array.cpp +++ b/test_conformance/images/kernel_read_write/test_write_1D_array.cpp @@ -14,6 +14,7 @@ // limitations under the License. // #include "../testBase.h" +#include "test_common.h" #if !defined(_WIN32) #include @@ -21,7 +22,6 @@ extern cl_mem_flags gMemFlagsToUse; extern int gtestTypesToRun; -extern bool gDeviceLt20; extern bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo ); extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo ); @@ -416,6 +416,9 @@ int test_write_image_1D_array( cl_device_id device, cl_context context, cl_comma } else { + + filter_undefined_bits(imageInfo, resultPtr); + // Exact result passes every time if( memcmp( resultBuffer, resultPtr, pixelSize ) != 0 ) { @@ -424,21 +427,8 @@ int test_write_image_1D_array( cl_device_id device, cl_context context, cl_comma float errors[4] = {NAN, NAN, NAN, NAN}; pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors ); - // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats - if( 0 == forceCorrectlyRoundedWrites && - ( - imageInfo->format->image_channel_data_type == CL_UNORM_INT8 || - imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 || - imageInfo->format->image_channel_data_type == CL_UNORM_INT16 || - imageInfo->format->image_channel_data_type == CL_SNORM_INT8 || - imageInfo->format->image_channel_data_type == CL_SNORM_INT16 - )) - { - if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) && - ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f) ) - failure = 0; - } - + failure = filter_rounding_errors( + forceCorrectlyRoundedWrites, imageInfo, errors); if( failure ) { @@ -479,6 +469,56 @@ int test_write_image_1D_array( cl_device_id device, cl_context context, cl_comma log_error( " Actual: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] ); log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] ); break; + case CL_UNORM_SHORT_565: { + cl_uint *ref_value = + (cl_uint *)resultBuffer; + cl_uint *test_value = + (cl_uint *)resultPtr; + + log_error(" Expected: 0x%2.2x Actual: " + "0x%2.2x \n", + ref_value[0], test_value[0]); + + log_error(" Expected: 0x%2.2x " + "0x%2.2x 0x%2.2x \n", + ref_value[0] & 0x1F, + (ref_value[0] >> 5) & 0x3F, + (ref_value[0] >> 11) & 0x1F); + log_error(" Actual: 0x%2.2x " + "0x%2.2x 0x%2.2x \n", + test_value[0] & 0x1F, + (test_value[0] >> 5) & 0x3F, + (test_value[0] >> 11) & 0x1F); + log_error(" Error: %f %f %f %f\n", + errors[0], errors[1], + errors[2]); + break; + } + case CL_UNORM_SHORT_555: { + cl_uint *ref_value = + (cl_uint *)resultBuffer; + cl_uint *test_value = + (cl_uint *)resultPtr; + + log_error(" Expected: 0x%2.2x Actual: " + "0x%2.2x \n", + ref_value[0], test_value[0]); + + log_error(" Expected: 0x%2.2x " + "0x%2.2x 0x%2.2x \n", + ref_value[0] & 0x1F, + (ref_value[0] >> 5) & 0x1F, + (ref_value[0] >> 10) & 0x1F); + log_error(" Actual: 0x%2.2x " + "0x%2.2x 0x%2.2x \n", + test_value[0] & 0x1F, + (test_value[0] >> 5) & 0x1F, + (test_value[0] >> 10) & 0x1F); + log_error(" Error: %f %f %f %f\n", + errors[0], errors[1], + errors[2]); + break; + } case CL_UNORM_INT16: case CL_SNORM_INT16: case CL_UNSIGNED_INT16: @@ -543,7 +583,10 @@ int test_write_image_1D_array( cl_device_id device, cl_context context, cl_comma } -int test_write_image_1D_array_set( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType inputType, MTdata d ) +int test_write_image_1D_array_set(cl_device_id device, cl_context context, + cl_command_queue queue, + const cl_image_format *format, + ExplicitType inputType, MTdata d) { char programSrc[10240]; const char *ptr; @@ -603,7 +646,8 @@ int test_write_image_1D_array_set( cl_device_id device, cl_context context, cl_c gTestMipmaps ? ", lod" :"" ); ptr = programSrc; - error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0"); + error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "sample_kernel"); test_error( error, "Unable to create testing kernel" ); // Run tests diff --git a/test_conformance/images/kernel_read_write/test_write_2D_array.cpp b/test_conformance/images/kernel_read_write/test_write_2D_array.cpp index 3de467106f..5bca7124e8 100644 --- a/test_conformance/images/kernel_read_write/test_write_2D_array.cpp +++ b/test_conformance/images/kernel_read_write/test_write_2D_array.cpp @@ -14,6 +14,7 @@ // limitations under the License. // #include "../testBase.h" +#include "test_common.h" #if !defined(_WIN32) #include @@ -21,7 +22,6 @@ extern cl_mem_flags gMemFlagsToUse; extern int gtestTypesToRun; -extern bool gDeviceLt20; extern bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo ); extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo ); @@ -439,6 +439,9 @@ int test_write_image_2D_array( cl_device_id device, cl_context context, cl_comma } else { + + filter_undefined_bits(imageInfo, resultPtr); + // Exact result passes every time if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 ) { @@ -447,21 +450,9 @@ int test_write_image_2D_array( cl_device_id device, cl_context context, cl_comma float errors[4] = {NAN, NAN, NAN, NAN}; pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors ); - // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats - if( 0 == forceCorrectlyRoundedWrites && - ( - imageInfo->format->image_channel_data_type == CL_UNORM_INT8 || - imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 || - imageInfo->format->image_channel_data_type == CL_UNORM_INT16 || - imageInfo->format->image_channel_data_type == CL_SNORM_INT8 || - imageInfo->format->image_channel_data_type == CL_SNORM_INT16 - )) - { - if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) && - ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f) ) - failure = 0; - } - + failure = filter_rounding_errors( + forceCorrectlyRoundedWrites, imageInfo, + errors); if( failure ) { @@ -502,6 +493,64 @@ int test_write_image_2D_array( cl_device_id device, cl_context context, cl_comma log_error( " Actual: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] ); log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] ); break; + case CL_UNORM_SHORT_565: { + cl_uint *ref_value = + (cl_uint *)resultBuffer; + cl_uint *test_value = + (cl_uint *)resultPtr; + + log_error(" Expected: 0x%2.2x " + "Actual: 0x%2.2x \n", + ref_value[0], + test_value[0]); + + log_error( + " Expected: 0x%2.2x 0x%2.2x " + "0x%2.2x \n", + ref_value[0] & 0x1F, + (ref_value[0] >> 5) & 0x3F, + (ref_value[0] >> 11) & 0x1F); + log_error( + " Actual: 0x%2.2x 0x%2.2x " + "0x%2.2x \n", + test_value[0] & 0x1F, + (test_value[0] >> 5) & 0x3F, + (test_value[0] >> 11) & 0x1F); + log_error( + " Error: %f %f %f %f\n", + errors[0], errors[1], + errors[2]); + break; + } + case CL_UNORM_SHORT_555: { + cl_uint *ref_value = + (cl_uint *)resultBuffer; + cl_uint *test_value = + (cl_uint *)resultPtr; + + log_error(" Expected: 0x%2.2x " + "Actual: 0x%2.2x \n", + ref_value[0], + test_value[0]); + + log_error( + " Expected: 0x%2.2x 0x%2.2x " + "0x%2.2x \n", + ref_value[0] & 0x1F, + (ref_value[0] >> 5) & 0x1F, + (ref_value[0] >> 10) & 0x1F); + log_error( + " Actual: 0x%2.2x 0x%2.2x " + "0x%2.2x \n", + test_value[0] & 0x1F, + (test_value[0] >> 5) & 0x1F, + (test_value[0] >> 10) & 0x1F); + log_error( + " Error: %f %f %f %f\n", + errors[0], errors[1], + errors[2]); + break; + } case CL_UNORM_INT16: case CL_SNORM_INT16: case CL_UNSIGNED_INT16: @@ -569,7 +618,10 @@ int test_write_image_2D_array( cl_device_id device, cl_context context, cl_comma } -int test_write_image_2D_array_set( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType inputType, MTdata d ) +int test_write_image_2D_array_set(cl_device_id device, cl_context context, + cl_command_queue queue, + const cl_image_format *format, + ExplicitType inputType, MTdata d) { char programSrc[10240]; const char *ptr; @@ -630,7 +682,8 @@ int test_write_image_2D_array_set( cl_device_id device, cl_context context, cl_c gTestMipmaps ? ", lod" : "" ); ptr = programSrc; - error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0"); + error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "sample_kernel"); test_error( error, "Unable to create testing kernel" ); // Run tests diff --git a/test_conformance/images/kernel_read_write/test_write_3D.cpp b/test_conformance/images/kernel_read_write/test_write_3D.cpp index c6223d8a29..d9a69627b1 100644 --- a/test_conformance/images/kernel_read_write/test_write_3D.cpp +++ b/test_conformance/images/kernel_read_write/test_write_3D.cpp @@ -14,6 +14,7 @@ // limitations under the License. // #include "../testBase.h" +#include "test_common.h" #if !defined(_WIN32) #include @@ -21,7 +22,6 @@ extern cl_mem_flags gMemFlagsToUse; extern int gtestTypesToRun; -extern bool gDeviceLt20; extern bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo ); extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo ); @@ -446,6 +446,9 @@ int test_write_image_3D( cl_device_id device, cl_context context, cl_command_que } else { + + filter_undefined_bits(imageInfo, resultPtr); + // Exact result passes every time if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 ) { @@ -454,21 +457,9 @@ int test_write_image_3D( cl_device_id device, cl_context context, cl_command_que float errors[4] = {NAN, NAN, NAN, NAN}; pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors ); - // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats - if( 0 == forceCorrectlyRoundedWrites && - ( - imageInfo->format->image_channel_data_type == CL_UNORM_INT8 || - imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 || - imageInfo->format->image_channel_data_type == CL_UNORM_INT16 || - imageInfo->format->image_channel_data_type == CL_SNORM_INT8 || - imageInfo->format->image_channel_data_type == CL_SNORM_INT16 - )) - { - if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) && - ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f) ) - failure = 0; - } - + failure = filter_rounding_errors( + forceCorrectlyRoundedWrites, imageInfo, + errors); if( failure ) { @@ -509,6 +500,64 @@ int test_write_image_3D( cl_device_id device, cl_context context, cl_command_que log_error( " Actual: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] ); log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] ); break; + case CL_UNORM_SHORT_565: { + cl_uint *ref_value = + (cl_uint *)resultBuffer; + cl_uint *test_value = + (cl_uint *)resultPtr; + + log_error(" Expected: 0x%2.2x " + "Actual: 0x%2.2x \n", + ref_value[0], + test_value[0]); + + log_error( + " Expected: 0x%2.2x 0x%2.2x " + "0x%2.2x \n", + ref_value[0] & 0x1F, + (ref_value[0] >> 5) & 0x3F, + (ref_value[0] >> 11) & 0x1F); + log_error( + " Actual: 0x%2.2x 0x%2.2x " + "0x%2.2x \n", + test_value[0] & 0x1F, + (test_value[0] >> 5) & 0x3F, + (test_value[0] >> 11) & 0x1F); + log_error( + " Error: %f %f %f %f\n", + errors[0], errors[1], + errors[2]); + break; + } + case CL_UNORM_SHORT_555: { + cl_uint *ref_value = + (cl_uint *)resultBuffer; + cl_uint *test_value = + (cl_uint *)resultPtr; + + log_error(" Expected: 0x%2.2x " + "Actual: 0x%2.2x \n", + ref_value[0], + test_value[0]); + + log_error( + " Expected: 0x%2.2x 0x%2.2x " + "0x%2.2x \n", + ref_value[0] & 0x1F, + (ref_value[0] >> 5) & 0x1F, + (ref_value[0] >> 10) & 0x1F); + log_error( + " Actual: 0x%2.2x 0x%2.2x " + "0x%2.2x \n", + test_value[0] & 0x1F, + (test_value[0] >> 5) & 0x1F, + (test_value[0] >> 10) & 0x1F); + log_error( + " Error: %f %f %f %f\n", + errors[0], errors[1], + errors[2]); + break; + } case CL_UNORM_INT16: case CL_SNORM_INT16: case CL_UNSIGNED_INT16: @@ -577,7 +626,10 @@ int test_write_image_3D( cl_device_id device, cl_context context, cl_command_que } -int test_write_image_3D_set( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType inputType, MTdata d ) +int test_write_image_3D_set(cl_device_id device, cl_context context, + cl_command_queue queue, + const cl_image_format *format, + ExplicitType inputType, MTdata d) { char programSrc[10240]; const char *ptr; @@ -636,7 +688,8 @@ int test_write_image_3D_set( cl_device_id device, cl_context context, cl_command gTestMipmaps ? ", lod" : "" ); ptr = programSrc; - error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0"); + error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "sample_kernel"); test_error( error, "Unable to create testing kernel" ); // Run tests diff --git a/test_conformance/images/kernel_read_write/test_write_image.cpp b/test_conformance/images/kernel_read_write/test_write_image.cpp index e848ab4ffb..9cc9698ce2 100644 --- a/test_conformance/images/kernel_read_write/test_write_image.cpp +++ b/test_conformance/images/kernel_read_write/test_write_image.cpp @@ -14,6 +14,7 @@ // limitations under the License. // #include "../testBase.h" +#include "test_common.h" #if !defined(_WIN32) #include @@ -22,12 +23,25 @@ extern bool gTestImage2DFromBuffer; extern cl_mem_flags gMemFlagsToUse; extern int gtestTypesToRun; -extern bool gDeviceLt20; -extern int test_write_image_1D_set( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType inputType, MTdata d ); -extern int test_write_image_3D_set( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType inputType, MTdata d ); -extern int test_write_image_1D_array_set( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType inputType, MTdata d ); -extern int test_write_image_2D_array_set( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType inputType, MTdata d ); +extern int test_write_image_1D_set(cl_device_id device, cl_context context, + cl_command_queue queue, + const cl_image_format *format, + ExplicitType inputType, MTdata d); +extern int test_write_image_3D_set(cl_device_id device, cl_context context, + cl_command_queue queue, + const cl_image_format *format, + ExplicitType inputType, MTdata d); +extern int test_write_image_1D_array_set(cl_device_id device, + cl_context context, + cl_command_queue queue, + const cl_image_format *format, + ExplicitType inputType, MTdata d); +extern int test_write_image_2D_array_set(cl_device_id device, + cl_context context, + cl_command_queue queue, + const cl_image_format *format, + ExplicitType inputType, MTdata d); extern bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo ); extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo ); @@ -464,6 +478,9 @@ int test_write_image( cl_device_id device, cl_context context, cl_command_queue } else { + + filter_undefined_bits(imageInfo, resultPtr); + // Exact result passes every time if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 ) { @@ -472,21 +489,8 @@ int test_write_image( cl_device_id device, cl_context context, cl_command_queue float errors[4] = {NAN, NAN, NAN, NAN}; pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors ); - // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats - if( 0 == forceCorrectlyRoundedWrites && - ( - imageInfo->format->image_channel_data_type == CL_UNORM_INT8 || - imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 || - imageInfo->format->image_channel_data_type == CL_UNORM_INT16 || - imageInfo->format->image_channel_data_type == CL_SNORM_INT8 || - imageInfo->format->image_channel_data_type == CL_SNORM_INT16 - )) - { - if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) && - ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f) ) - failure = 0; - } - + failure = filter_rounding_errors( + forceCorrectlyRoundedWrites, imageInfo, errors); if( failure ) { @@ -564,6 +568,57 @@ int test_write_image( cl_device_id device, cl_context context, cl_command_queue log_error( " Actual: %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] ); log_error( " Ulps: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] ); break; + case CL_UNORM_SHORT_565: { + cl_uint *ref_value = + (cl_uint *)resultBuffer; + cl_uint *test_value = + (cl_uint *)resultPtr; + + log_error(" Expected: 0x%2.2x Actual: " + "0x%2.2x \n", + ref_value[0], test_value[0]); + + log_error(" Expected: 0x%2.2x " + "0x%2.2x 0x%2.2x \n", + ref_value[0] & 0x1F, + (ref_value[0] >> 5) & 0x3F, + (ref_value[0] >> 11) & 0x1F); + log_error(" Actual: 0x%2.2x " + "0x%2.2x 0x%2.2x \n", + test_value[0] & 0x1F, + (test_value[0] >> 5) & 0x3F, + (test_value[0] >> 11) & 0x1F); + log_error(" Error: %f %f %f %f\n", + errors[0], errors[1], + errors[2]); + break; + } + + case CL_UNORM_SHORT_555: { + cl_uint *ref_value = + (cl_uint *)resultBuffer; + cl_uint *test_value = + (cl_uint *)resultPtr; + + log_error(" Expected: 0x%2.2x Actual: " + "0x%2.2x \n", + ref_value[0], test_value[0]); + + log_error(" Expected: 0x%2.2x " + "0x%2.2x 0x%2.2x \n", + ref_value[0] & 0x1F, + (ref_value[0] >> 5) & 0x1F, + (ref_value[0] >> 10) & 0x1F); + log_error(" Actual: 0x%2.2x " + "0x%2.2x 0x%2.2x \n", + test_value[0] & 0x1F, + (test_value[0] >> 5) & 0x1F, + (test_value[0] >> 10) & 0x1F); + log_error(" Error: %f %f %f %f\n", + errors[0], errors[1], + errors[2]); + break; + } } float *v = (float *)(char *)imagePtr; @@ -596,7 +651,9 @@ int test_write_image( cl_device_id device, cl_context context, cl_command_queue } -int test_write_image_set( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType inputType, MTdata d ) +int test_write_image_set(cl_device_id device, cl_context context, + cl_command_queue queue, const cl_image_format *format, + ExplicitType inputType, MTdata d) { char programSrc[10240]; const char *ptr; @@ -682,7 +739,8 @@ int test_write_image_set( cl_device_id device, cl_context context, cl_command_qu gTestMipmaps ? ", lod" : "" ); ptr = programSrc; - error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0"); + error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "sample_kernel"); test_error( error, "Unable to create testing kernel" ); // Run tests @@ -797,8 +855,13 @@ int test_write_image_set( cl_device_id device, cl_context context, cl_command_qu return 0; } -int test_write_image_formats( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *formatList, bool *filterFlags, unsigned int numFormats, - image_sampler_data *imageSampler, ExplicitType inputType, cl_mem_object_type imageType ) +int test_write_image_formats(cl_device_id device, cl_context context, + cl_command_queue queue, + const std::vector &formatList, + const std::vector &filterFlags, + image_sampler_data *imageSampler, + ExplicitType inputType, + cl_mem_object_type imageType) { if( imageSampler->filter_mode == CL_FILTER_LINEAR ) // No need to run for linear filters @@ -811,9 +874,9 @@ int test_write_image_formats( cl_device_id device, cl_context context, cl_comman RandomSeed seed( gRandomSeed ); - for( unsigned int i = 0; i < numFormats; i++ ) + for (unsigned int i = 0; i < formatList.size(); i++) { - cl_image_format &imageFormat = formatList[ i ]; + const cl_image_format &imageFormat = formatList[i]; if( filterFlags[ i ] ) continue; diff --git a/test_conformance/images/samplerlessReads/main.cpp b/test_conformance/images/samplerlessReads/main.cpp index cb4425924f..cd377793eb 100644 --- a/test_conformance/images/samplerlessReads/main.cpp +++ b/test_conformance/images/samplerlessReads/main.cpp @@ -36,7 +36,6 @@ int gTypesToTest; cl_channel_type gChannelTypeToUse = (cl_channel_type)-1; cl_channel_order gChannelOrderToUse = (cl_channel_order)-1; bool gEnablePitch = false; -bool gDeviceLt20 = false; static void printUsage( const char *execName ); diff --git a/test_conformance/images/samplerlessReads/test_iterations.cpp b/test_conformance/images/samplerlessReads/test_iterations.cpp index 11b364f929..55eaaf48c8 100644 --- a/test_conformance/images/samplerlessReads/test_iterations.cpp +++ b/test_conformance/images/samplerlessReads/test_iterations.cpp @@ -22,7 +22,6 @@ #include #endif -extern bool gDeviceLt20; extern bool gTestReadWrite; const char *read2DKernelSourcePattern = @@ -177,8 +176,11 @@ int test_read_image_2D( cl_context context, cl_command_queue queue, cl_kernel ke return 0; } -int test_read_image_set_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler, - ExplicitType outputType ) +int test_read_image_set_2D(cl_device_id device, cl_context context, + cl_command_queue queue, + const cl_image_format *format, + image_sampler_data *imageSampler, + ExplicitType outputType) { char programSrc[10240]; const char *ptr; @@ -254,7 +256,8 @@ int test_read_image_set_2D( cl_device_id device, cl_context context, cl_command_ } ptr = programSrc; - error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0" ); + error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "sample_kernel"); test_error( error, "Unable to create testing kernel" ); if ( gTestSmallImages ) diff --git a/test_conformance/images/samplerlessReads/test_loops.cpp b/test_conformance/images/samplerlessReads/test_loops.cpp index 27840ca70c..db49a8f62f 100644 --- a/test_conformance/images/samplerlessReads/test_loops.cpp +++ b/test_conformance/images/samplerlessReads/test_loops.cpp @@ -17,18 +17,44 @@ #include "../common.h" extern int gTypesToTest; -extern bool gDeviceLt20; extern bool gTestReadWrite; -extern int test_read_image_set_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler, ExplicitType outputType ); -extern int test_read_image_set_1D_buffer( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler, ExplicitType outputType ); -extern int test_read_image_set_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler, ExplicitType outputType ); -extern int test_read_image_set_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler, ExplicitType outputType ); -extern int test_read_image_set_1D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler, ExplicitType outputType ); -extern int test_read_image_set_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler, ExplicitType outputType ); - -int test_read_image_type( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, - image_sampler_data *imageSampler, ExplicitType outputType, cl_mem_object_type imageType ) +extern int test_read_image_set_1D(cl_device_id device, cl_context context, + cl_command_queue queue, + const cl_image_format *format, + image_sampler_data *imageSampler, + ExplicitType outputType); +extern int test_read_image_set_1D_buffer(cl_device_id device, + cl_context context, + cl_command_queue queue, + const cl_image_format *format, + image_sampler_data *imageSampler, + ExplicitType outputType); +extern int test_read_image_set_2D(cl_device_id device, cl_context context, + cl_command_queue queue, + const cl_image_format *format, + image_sampler_data *imageSampler, + ExplicitType outputType); +extern int test_read_image_set_3D(cl_device_id device, cl_context context, + cl_command_queue queue, + const cl_image_format *format, + image_sampler_data *imageSampler, + ExplicitType outputType); +extern int test_read_image_set_1D_array(cl_device_id device, cl_context context, + cl_command_queue queue, + const cl_image_format *format, + image_sampler_data *imageSampler, + ExplicitType outputType); +extern int test_read_image_set_2D_array(cl_device_id device, cl_context context, + cl_command_queue queue, + const cl_image_format *format, + image_sampler_data *imageSampler, + ExplicitType outputType); + +int test_read_image_type(cl_device_id device, cl_context context, + cl_command_queue queue, const cl_image_format *format, + image_sampler_data *imageSampler, + ExplicitType outputType, cl_mem_object_type imageType) { int ret = 0; imageSampler->addressing_mode = CL_ADDRESS_NONE; @@ -69,20 +95,25 @@ int test_read_image_type( cl_device_id device, cl_context context, cl_command_qu return ret; } -int test_read_image_formats( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *formatList, bool *filterFlags, unsigned int numFormats, - image_sampler_data *imageSampler, ExplicitType outputType, cl_mem_object_type imageType ) +int test_read_image_formats(cl_device_id device, cl_context context, + cl_command_queue queue, + const std::vector &formatList, + const std::vector &filterFlags, + image_sampler_data *imageSampler, + ExplicitType outputType, + cl_mem_object_type imageType) { int ret = 0; imageSampler->normalized_coords = false; log_info( "read_image (%s coords, %s results) *****************************\n", "integer", get_explicit_type_name( outputType ) ); - for ( unsigned int i = 0; i < numFormats; i++ ) + for (unsigned int i = 0; i < formatList.size(); i++) { if ( filterFlags[i] ) continue; - cl_image_format &imageFormat = formatList[ i ]; + const cl_image_format &imageFormat = formatList[i]; ret |= test_read_image_type( device, context, queue, &imageFormat, imageSampler, outputType, imageType ); } @@ -96,39 +127,50 @@ int test_image_set( cl_device_id device, cl_context context, cl_command_queue qu static int printedFormatList = -1; // Grab the list of supported image formats - cl_image_format *formatList; - bool *filterFlags; - unsigned int numFormats; - auto version = get_device_cl_version(device); - if (version < Version(2, 0)) { - gDeviceLt20 = true; - } + std::vector formatList; if (gTestReadWrite && checkForReadWriteImageSupport(device)) { return TEST_SKIPPED_ITSELF; } - // This flag is only for querying the list of supported formats - // The flag for creating image will be set explicitly in test functions - cl_mem_flags flags = (gTestReadWrite)? CL_MEM_KERNEL_READ_AND_WRITE : CL_MEM_READ_ONLY; - - if ( get_format_list( context, imageType, formatList, numFormats, flags ) ) + std::vector readOnlyFormats; + if (get_format_list(context, imageType, readOnlyFormats, CL_MEM_READ_ONLY)) return -1; - filterFlags = new bool[ numFormats ]; - if ( filterFlags == NULL ) + if (gTestReadWrite) { - log_error( "ERROR: Out of memory allocating filter flags list!\n" ); - return -1; + std::vector readWriteFormats; + if (get_format_list(context, imageType, readWriteFormats, + CL_MEM_KERNEL_READ_AND_WRITE)) + return -1; + + // Keep only intersecting formats with read only and read write flags + for (unsigned int i = 0; i < readOnlyFormats.size(); i++) + { + for (unsigned int j = 0; j < readWriteFormats.size(); j++) + { + if (readOnlyFormats[i].image_channel_data_type + == readWriteFormats[j].image_channel_data_type + && readOnlyFormats[i].image_channel_order + == readWriteFormats[j].image_channel_order) + { + formatList.push_back(readOnlyFormats[i]); + break; + } + } + } + } + else + { + formatList = readOnlyFormats; } - memset( filterFlags, 0, sizeof( bool ) * numFormats ); // First time through, we'll go ahead and print the formats supported, regardless of type if ( printedFormatList != (int)imageType ) { log_info( "---- Supported %s read formats for this device ---- \n", convert_image_type_to_string(imageType) ); - for ( unsigned int f = 0; f < numFormats; f++ ) + for (unsigned int f = 0; f < formatList.size(); f++) log_info( " %-7s %-24s %d\n", GetChannelOrderName( formatList[ f ].image_channel_order ), GetChannelTypeName( formatList[ f ].image_channel_data_type ), (int)get_format_channel_count( &formatList[ f ] ) ); @@ -142,9 +184,8 @@ int test_image_set( cl_device_id device, cl_context context, cl_command_queue qu { if (gTypesToTest & test.type) { - if (filter_formats(formatList, filterFlags, numFormats, - test.channelTypes) - == 0) + std::vector filterFlags(formatList.size(), false); + if (filter_formats(formatList, filterFlags, test.channelTypes) == 0) { log_info("No formats supported for %s type\n", test.name); } @@ -152,14 +193,11 @@ int test_image_set( cl_device_id device, cl_context context, cl_command_queue qu { imageSampler.filter_mode = CL_FILTER_NEAREST; ret += test_read_image_formats( - device, context, queue, formatList, filterFlags, numFormats, + device, context, queue, formatList, filterFlags, &imageSampler, test.explicitType, imageType); } } } - delete[] filterFlags; - delete[] formatList; - return ret; } diff --git a/test_conformance/images/samplerlessReads/test_read_1D.cpp b/test_conformance/images/samplerlessReads/test_read_1D.cpp index d17fdfcf94..aa261b7ead 100644 --- a/test_conformance/images/samplerlessReads/test_read_1D.cpp +++ b/test_conformance/images/samplerlessReads/test_read_1D.cpp @@ -22,7 +22,6 @@ #include #endif -extern bool gDeviceLt20; extern bool gTestReadWrite; const char *read1DKernelSourcePattern = @@ -178,8 +177,11 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke return 0; } -int test_read_image_set_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler, - ExplicitType outputType ) +int test_read_image_set_1D(cl_device_id device, cl_context context, + cl_command_queue queue, + const cl_image_format *format, + image_sampler_data *imageSampler, + ExplicitType outputType) { char programSrc[10240]; const char *ptr; @@ -252,7 +254,8 @@ int test_read_image_set_1D( cl_device_id device, cl_context context, cl_command_ ptr = programSrc; - error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0" ); + error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "sample_kernel"); test_error( error, "Unable to create testing kernel" ); if ( gTestSmallImages ) diff --git a/test_conformance/images/samplerlessReads/test_read_1D_array.cpp b/test_conformance/images/samplerlessReads/test_read_1D_array.cpp index 6a0e1d535e..fb0c26326b 100644 --- a/test_conformance/images/samplerlessReads/test_read_1D_array.cpp +++ b/test_conformance/images/samplerlessReads/test_read_1D_array.cpp @@ -22,7 +22,6 @@ #include #endif -extern bool gDeviceLt20; extern bool gTestReadWrite; const char *read1DArrayKernelSourcePattern = @@ -176,8 +175,11 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker return 0; } -int test_read_image_set_1D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler, - ExplicitType outputType ) +int test_read_image_set_1D_array(cl_device_id device, cl_context context, + cl_command_queue queue, + const cl_image_format *format, + image_sampler_data *imageSampler, + ExplicitType outputType) { char programSrc[10240]; const char *ptr; @@ -251,7 +253,8 @@ int test_read_image_set_1D_array( cl_device_id device, cl_context context, cl_co ptr = programSrc; - error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0" ); + error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "sample_kernel"); test_error( error, "Unable to create testing kernel" ); if ( gTestSmallImages ) diff --git a/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp b/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp index c21b12c828..7a3084d3ab 100644 --- a/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp +++ b/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp @@ -22,7 +22,6 @@ #include #endif -extern bool gDeviceLt20; const char *read1DBufferKernelSourcePattern = "__kernel void sample_kernel( read_only image1d_buffer_t inputA, read_only image1d_t inputB, sampler_t sampler, __global int *results )\n" @@ -161,8 +160,11 @@ int test_read_image_1D_buffer( cl_context context, cl_command_queue queue, cl_ke return 0; } -int test_read_image_set_1D_buffer( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler, - ExplicitType outputType ) +int test_read_image_set_1D_buffer(cl_device_id device, cl_context context, + cl_command_queue queue, + const cl_image_format *format, + image_sampler_data *imageSampler, + ExplicitType outputType) { char programSrc[10240]; const char *ptr; @@ -244,7 +246,8 @@ int test_read_image_set_1D_buffer( cl_device_id device, cl_context context, cl_c readFormat ); ptr = programSrc; - error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0" ); + error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "sample_kernel"); test_error( error, "Unable to create testing kernel" ); if ( gTestSmallImages ) diff --git a/test_conformance/images/samplerlessReads/test_read_2D_array.cpp b/test_conformance/images/samplerlessReads/test_read_2D_array.cpp index cfc12725fe..99f24266d4 100644 --- a/test_conformance/images/samplerlessReads/test_read_2D_array.cpp +++ b/test_conformance/images/samplerlessReads/test_read_2D_array.cpp @@ -16,7 +16,6 @@ #include "../testBase.h" #include -extern bool gDeviceLt20; extern bool gTestReadWrite; const char *read2DArrayKernelSourcePattern = @@ -162,8 +161,11 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker return 0; } -int test_read_image_set_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, - image_sampler_data *imageSampler, ExplicitType outputType ) +int test_read_image_set_2D_array(cl_device_id device, cl_context context, + cl_command_queue queue, + const cl_image_format *format, + image_sampler_data *imageSampler, + ExplicitType outputType) { char programSrc[10240]; const char *ptr; @@ -241,7 +243,8 @@ int test_read_image_set_2D_array( cl_device_id device, cl_context context, cl_co ptr = programSrc; - error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0" ); + error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "sample_kernel"); test_error( error, "Unable to create testing kernel" ); diff --git a/test_conformance/images/samplerlessReads/test_read_3D.cpp b/test_conformance/images/samplerlessReads/test_read_3D.cpp index da5466f77e..cf4114074f 100644 --- a/test_conformance/images/samplerlessReads/test_read_3D.cpp +++ b/test_conformance/images/samplerlessReads/test_read_3D.cpp @@ -16,7 +16,6 @@ #include "../testBase.h" #include -extern bool gDeviceLt20; extern bool gTestReadWrite; const char *read3DKernelSourcePattern = @@ -165,8 +164,11 @@ int test_read_image_3D( cl_context context, cl_command_queue queue, cl_kernel ke return 0; } -int test_read_image_set_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, - image_sampler_data *imageSampler, ExplicitType outputType ) +int test_read_image_set_3D(cl_device_id device, cl_context context, + cl_command_queue queue, + const cl_image_format *format, + image_sampler_data *imageSampler, + ExplicitType outputType) { char programSrc[10240]; const char *ptr; @@ -244,7 +246,8 @@ int test_read_image_set_3D( cl_device_id device, cl_context context, cl_command_ ptr = programSrc; - error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0" ); + error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "sample_kernel"); test_error( error, "Unable to create testing kernel" ); diff --git a/test_conformance/images/testBase.h b/test_conformance/images/testBase.h index 7c45fdd109..ad48f10d5c 100644 --- a/test_conformance/images/testBase.h +++ b/test_conformance/images/testBase.h @@ -64,19 +64,22 @@ enum TestTypes kAllTests = ( kReadTests | kWriteTests | kReadWriteTests ) }; -typedef int (*test_format_set_fn)( cl_device_id device, cl_context context, cl_command_queue queue, - cl_image_format *formatList, bool *filterFlags, unsigned int numFormats, - image_sampler_data *imageSampler, ExplicitType outputType, - cl_mem_object_type imageType ); +typedef int (*test_format_set_fn)( + cl_device_id device, cl_context context, cl_command_queue queue, + const std::vector &formatList, + const std::vector &filterFlags, image_sampler_data *imageSampler, + ExplicitType outputType, cl_mem_object_type imageType); -extern int test_read_image_formats( cl_device_id device, cl_context context, cl_command_queue queue, - cl_image_format *formatList, bool *filterFlags, unsigned int numFormats, - image_sampler_data *imageSampler, ExplicitType outputType, - cl_mem_object_type imageType ); -extern int test_write_image_formats( cl_device_id device, cl_context context, cl_command_queue queue, - cl_image_format *formatList, bool *filterFlags, unsigned int numFormats, - image_sampler_data *imageSampler, ExplicitType outputType, - cl_mem_object_type imageType ); +extern int test_read_image_formats( + cl_device_id device, cl_context context, cl_command_queue queue, + const std::vector &formatList, + const std::vector &filterFlags, image_sampler_data *imageSampler, + ExplicitType outputType, cl_mem_object_type imageType); +extern int test_write_image_formats( + cl_device_id device, cl_context context, cl_command_queue queue, + const std::vector &formatList, + const std::vector &filterFlags, image_sampler_data *imageSampler, + ExplicitType outputType, cl_mem_object_type imageType); #endif // _testBase_h diff --git a/test_conformance/integer_ops/CMakeLists.txt b/test_conformance/integer_ops/CMakeLists.txt index a045ef81c1..5344eabc07 100644 --- a/test_conformance/integer_ops/CMakeLists.txt +++ b/test_conformance/integer_ops/CMakeLists.txt @@ -11,6 +11,7 @@ set(${MODULE_NAME}_SOURCES test_unary_ops.cpp verification_and_generation_functions.cpp test_popcount.cpp + test_integer_dot_product.cpp ) include(../CMakeCommon.txt) diff --git a/test_conformance/integer_ops/main.cpp b/test_conformance/integer_ops/main.cpp index 00e91661c3..e57cffd949 100644 --- a/test_conformance/integer_ops/main.cpp +++ b/test_conformance/integer_ops/main.cpp @@ -25,127 +25,129 @@ #endif test_definition test_list[] = { - ADD_TEST( integer_clz ), - ADD_TEST_VERSION( integer_ctz, Version(2, 0)), - ADD_TEST( integer_hadd ), - ADD_TEST( integer_rhadd ), - ADD_TEST( integer_mul_hi ), - ADD_TEST( integer_rotate ), - ADD_TEST( integer_clamp ), - ADD_TEST( integer_mad_sat ), - ADD_TEST( integer_mad_hi ), - ADD_TEST( integer_min ), - ADD_TEST( integer_max ), - ADD_TEST( integer_upsample ), - - ADD_TEST( integer_abs ), - ADD_TEST( integer_abs_diff ), - ADD_TEST( integer_add_sat ), - ADD_TEST( integer_sub_sat ), - - ADD_TEST( integer_addAssign ), - ADD_TEST( integer_subtractAssign ), - ADD_TEST( integer_multiplyAssign ), - ADD_TEST( integer_divideAssign ), - ADD_TEST( integer_moduloAssign ), - ADD_TEST( integer_andAssign ), - ADD_TEST( integer_orAssign ), - ADD_TEST( integer_exclusiveOrAssign ), - - ADD_TEST( unary_ops_increment ), - ADD_TEST( unary_ops_decrement ), - ADD_TEST( unary_ops_full ), - - ADD_TEST( integer_mul24 ), - ADD_TEST( integer_mad24 ), - - ADD_TEST( long_math ), - ADD_TEST( long_logic ), - ADD_TEST( long_shift ), - ADD_TEST( long_compare ), - - ADD_TEST( ulong_math ), - ADD_TEST( ulong_logic ), - ADD_TEST( ulong_shift ), - ADD_TEST( ulong_compare ), - - ADD_TEST( int_math ), - ADD_TEST( int_logic ), - ADD_TEST( int_shift ), - ADD_TEST( int_compare ), - - ADD_TEST( uint_math ), - ADD_TEST( uint_logic ), - ADD_TEST( uint_shift ), - ADD_TEST( uint_compare ), - - ADD_TEST( short_math ), - ADD_TEST( short_logic ), - ADD_TEST( short_shift ), - ADD_TEST( short_compare ), - - ADD_TEST( ushort_math ), - ADD_TEST( ushort_logic ), - ADD_TEST( ushort_shift ), - ADD_TEST( ushort_compare ), - - ADD_TEST( char_math ), - ADD_TEST( char_logic ), - ADD_TEST( char_shift ), - ADD_TEST( char_compare ), - - ADD_TEST( uchar_math ), - ADD_TEST( uchar_logic ), - ADD_TEST( uchar_shift ), - ADD_TEST( uchar_compare ), - - ADD_TEST( popcount ), + ADD_TEST(integer_clz), + ADD_TEST_VERSION(integer_ctz, Version(2, 0)), + ADD_TEST(integer_hadd), + ADD_TEST(integer_rhadd), + ADD_TEST(integer_mul_hi), + ADD_TEST(integer_rotate), + ADD_TEST(integer_clamp), + ADD_TEST(integer_mad_sat), + ADD_TEST(integer_mad_hi), + ADD_TEST(integer_min), + ADD_TEST(integer_max), + ADD_TEST(integer_upsample), + + ADD_TEST(integer_abs), + ADD_TEST(integer_abs_diff), + ADD_TEST(integer_add_sat), + ADD_TEST(integer_sub_sat), + + ADD_TEST(integer_addAssign), + ADD_TEST(integer_subtractAssign), + ADD_TEST(integer_multiplyAssign), + ADD_TEST(integer_divideAssign), + ADD_TEST(integer_moduloAssign), + ADD_TEST(integer_andAssign), + ADD_TEST(integer_orAssign), + ADD_TEST(integer_exclusiveOrAssign), + + ADD_TEST(unary_ops_increment), + ADD_TEST(unary_ops_decrement), + ADD_TEST(unary_ops_full), + + ADD_TEST(integer_mul24), + ADD_TEST(integer_mad24), + + ADD_TEST(long_math), + ADD_TEST(long_logic), + ADD_TEST(long_shift), + ADD_TEST(long_compare), + + ADD_TEST(ulong_math), + ADD_TEST(ulong_logic), + ADD_TEST(ulong_shift), + ADD_TEST(ulong_compare), + + ADD_TEST(int_math), + ADD_TEST(int_logic), + ADD_TEST(int_shift), + ADD_TEST(int_compare), + + ADD_TEST(uint_math), + ADD_TEST(uint_logic), + ADD_TEST(uint_shift), + ADD_TEST(uint_compare), + + ADD_TEST(short_math), + ADD_TEST(short_logic), + ADD_TEST(short_shift), + ADD_TEST(short_compare), + + ADD_TEST(ushort_math), + ADD_TEST(ushort_logic), + ADD_TEST(ushort_shift), + ADD_TEST(ushort_compare), + + ADD_TEST(char_math), + ADD_TEST(char_logic), + ADD_TEST(char_shift), + ADD_TEST(char_compare), + + ADD_TEST(uchar_math), + ADD_TEST(uchar_logic), + ADD_TEST(uchar_shift), + ADD_TEST(uchar_compare), + + ADD_TEST(popcount), // Quick - ADD_TEST( quick_long_math ), - ADD_TEST( quick_long_logic ), - ADD_TEST( quick_long_shift ), - ADD_TEST( quick_long_compare ), - - ADD_TEST( quick_ulong_math ), - ADD_TEST( quick_ulong_logic ), - ADD_TEST( quick_ulong_shift ), - ADD_TEST( quick_ulong_compare ), - - ADD_TEST( quick_int_math ), - ADD_TEST( quick_int_logic ), - ADD_TEST( quick_int_shift ), - ADD_TEST( quick_int_compare ), - - ADD_TEST( quick_uint_math ), - ADD_TEST( quick_uint_logic ), - ADD_TEST( quick_uint_shift ), - ADD_TEST( quick_uint_compare ), - - ADD_TEST( quick_short_math ), - ADD_TEST( quick_short_logic ), - ADD_TEST( quick_short_shift ), - ADD_TEST( quick_short_compare ), - - ADD_TEST( quick_ushort_math ), - ADD_TEST( quick_ushort_logic ), - ADD_TEST( quick_ushort_shift ), - ADD_TEST( quick_ushort_compare ), - - ADD_TEST( quick_char_math ), - ADD_TEST( quick_char_logic ), - ADD_TEST( quick_char_shift ), - ADD_TEST( quick_char_compare ), - - ADD_TEST( quick_uchar_math ), - ADD_TEST( quick_uchar_logic ), - ADD_TEST( quick_uchar_shift ), - ADD_TEST( quick_uchar_compare ), - - ADD_TEST( vector_scalar ), + ADD_TEST(quick_long_math), + ADD_TEST(quick_long_logic), + ADD_TEST(quick_long_shift), + ADD_TEST(quick_long_compare), + + ADD_TEST(quick_ulong_math), + ADD_TEST(quick_ulong_logic), + ADD_TEST(quick_ulong_shift), + ADD_TEST(quick_ulong_compare), + + ADD_TEST(quick_int_math), + ADD_TEST(quick_int_logic), + ADD_TEST(quick_int_shift), + ADD_TEST(quick_int_compare), + + ADD_TEST(quick_uint_math), + ADD_TEST(quick_uint_logic), + ADD_TEST(quick_uint_shift), + ADD_TEST(quick_uint_compare), + + ADD_TEST(quick_short_math), + ADD_TEST(quick_short_logic), + ADD_TEST(quick_short_shift), + ADD_TEST(quick_short_compare), + + ADD_TEST(quick_ushort_math), + ADD_TEST(quick_ushort_logic), + ADD_TEST(quick_ushort_shift), + ADD_TEST(quick_ushort_compare), + + ADD_TEST(quick_char_math), + ADD_TEST(quick_char_logic), + ADD_TEST(quick_char_shift), + ADD_TEST(quick_char_compare), + + ADD_TEST(quick_uchar_math), + ADD_TEST(quick_uchar_logic), + ADD_TEST(quick_uchar_shift), + ADD_TEST(quick_uchar_compare), + + ADD_TEST(vector_scalar), + + ADD_TEST(integer_dot_product), }; -const int test_num = ARRAY_SIZE( test_list ); +const int test_num = ARRAY_SIZE(test_list); void fill_test_values( cl_long *outBufferA, cl_long *outBufferB, size_t numElements, MTdata d ) { diff --git a/test_conformance/integer_ops/procs.h b/test_conformance/integer_ops/procs.h index d5b77e704b..82311fb9cc 100644 --- a/test_conformance/integer_ops/procs.h +++ b/test_conformance/integer_ops/procs.h @@ -141,3 +141,5 @@ extern int test_unary_ops_decrement(cl_device_id deviceID, cl_context context, c extern int test_vector_scalar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); +extern int test_integer_dot_product(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); diff --git a/test_conformance/integer_ops/test_add_sat.cpp b/test_conformance/integer_ops/test_add_sat.cpp index c0e45d114c..e33f5c672d 100644 --- a/test_conformance/integer_ops/test_add_sat.cpp +++ b/test_conformance/integer_ops/test_add_sat.cpp @@ -21,18 +21,9 @@ #include #include -#include "procs.h" - -#define UCHAR_MIN 0 -#define USHRT_MIN 0 -#define UINT_MIN 0 +#include -#ifndef MAX -#define MAX( _a, _b ) ( (_a) > (_b) ? (_a) : (_b) ) -#endif -#ifndef MIN -#define MIN( _a, _b ) ( (_a) < (_b) ? (_a) : (_b) ) -#endif +#include "procs.h" static int verify_addsat_char( const cl_char *inA, const cl_char *inB, const cl_char *outptr, int n, const char *sizeName, int vecSize ) { @@ -40,8 +31,8 @@ static int verify_addsat_char( const cl_char *inA, const cl_char *inB, const cl_ for( i = 0; i < n; i++ ) { cl_int r = (cl_int) inA[i] + (cl_int) inB[i]; - r = MAX( r, CL_CHAR_MIN ); - r = MIN( r, CL_CHAR_MAX ); + r = std::max(r, CL_CHAR_MIN); + r = std::min(r, CL_CHAR_MAX); if( r != outptr[i] ) { log_info( "\n%d) Failure for add_sat( (char%s) 0x%2.2x, (char%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; } @@ -55,9 +46,9 @@ static int verify_addsat_uchar( const cl_uchar *inA, const cl_uchar *inB, const for( i = 0; i < n; i++ ) { cl_int r = (int) inA[i] + (int) inB[i]; - r = MAX( r, 0 ); - r = MIN( r, CL_UCHAR_MAX ); - if( r != outptr[i] ) + r = std::max(r, 0); + r = std::min(r, CL_UCHAR_MAX); + if (r != outptr[i]) { log_info( "\n%d) Failure for add_sat( (uchar%s) 0x%2.2x, (uchar%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; } } return 0; @@ -69,8 +60,8 @@ static int verify_addsat_short( const cl_short *inA, const cl_short *inB, const for( i = 0; i < n; i++ ) { cl_int r = (cl_int) inA[i] + (cl_int) inB[i]; - r = MAX( r, CL_SHRT_MIN ); - r = MIN( r, CL_SHRT_MAX ); + r = std::max(r, CL_SHRT_MIN); + r = std::min(r, CL_SHRT_MAX); if( r != outptr[i] ) { log_info( "\n%d) Failure for add_sat( (short%s) 0x%4.4x, (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; } @@ -84,8 +75,8 @@ static int verify_addsat_ushort( const cl_ushort *inA, const cl_ushort *inB, con for( i = 0; i < n; i++ ) { cl_int r = (cl_int) inA[i] + (cl_int) inB[i]; - r = MAX( r, 0 ); - r = MIN( r, CL_USHRT_MAX ); + r = std::max(r, 0); + r = std::min(r, CL_USHRT_MAX); if( r != outptr[i] ) { log_info( "\n%d) Failure for add_sat( (ushort%s) 0x%4.4x, (ushort%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; } diff --git a/test_conformance/integer_ops/test_integer_dot_product.cpp b/test_conformance/integer_ops/test_integer_dot_product.cpp new file mode 100644 index 0000000000..602d59b628 --- /dev/null +++ b/test_conformance/integer_ops/test_integer_dot_product.cpp @@ -0,0 +1,442 @@ +// +// Copyright (c) 2021 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include +#include +#include +#include +#include + +#include "procs.h" +#include "harness/integer_ops_test_info.h" +#include "harness/testHarness.h" + +template +static void +calculate_reference(std::vector& ref, const std::vector& a, + const std::vector& b, const bool AccSat = false, + const std::vector& acc = {}) +{ + assert(a.size() == b.size()); + assert(AccSat == false || acc.size() == a.size() / N); + + ref.resize(a.size() / N); + for (size_t r = 0; r < ref.size(); r++) + { + cl_long result = AccSat ? acc[r] : 0; + for (size_t c = 0; c < N; c++) + { + // OK to assume no overflow? + result += a[r * N + c] * b[r * N + c]; + } + if (AccSat && result > std::numeric_limits::max()) + { + result = std::numeric_limits::max(); + } + ref[r] = static_cast(result); + } +} + +template +void generate_inputs_with_special_values(std::vector& a, + std::vector& b) +{ + const std::vector specialValuesA( + { static_cast(std::numeric_limits::min()), + static_cast(std::numeric_limits::min() + 1), + static_cast(std::numeric_limits::min() / 2), 0, + static_cast(std::numeric_limits::max() / 2), + static_cast(std::numeric_limits::max() - 1), + static_cast(std::numeric_limits::max()) }); + const std::vector specialValuesB( + { static_cast(std::numeric_limits::min()), + static_cast(std::numeric_limits::min() + 1), + static_cast(std::numeric_limits::min() / 2), 0, + static_cast(std::numeric_limits::max() / 2), + static_cast(std::numeric_limits::max() - 1), + static_cast(std::numeric_limits::max()) }); + + size_t count = 0; + for (auto svA : specialValuesA) + { + for (auto svB : specialValuesB) + { + a[count] = svA; + b[count] = svB; + ++count; + } + } + + // Generate random data for the rest of the inputs: + MTdataHolder d(gRandomSeed); + generate_random_data(TestInfo::explicitType, a.size() - count, d, + a.data() + count); + generate_random_data(TestInfo::explicitType, b.size() - count, d, + b.data() + count); +} + +template +void generate_acc_sat_inputs(std::vector& acc) +{ + // First generate random data: + fill_vector_with_random_data(acc); + + // Now go through the generated data, and make every other element large. + // This ensures we have some elements that need saturation. + for (size_t i = 0; i < acc.size(); i += 2) + { + acc[i] = std::numeric_limits::max() - acc[i]; + } +} + +template struct PackedTestInfo +{ + static constexpr const char* deviceTypeName = "UNSUPPORTED"; +}; +template <> struct PackedTestInfo +{ + static constexpr const char* deviceTypeName = "int"; +}; +template <> struct PackedTestInfo +{ + static constexpr const char* deviceTypeName = "uint"; +}; + +static constexpr const char* kernel_source_dot = R"CLC( +__kernel void test_dot(__global DSTTYPE* dst, __global SRCTYPEA* a, __global SRCTYPEB* b) +{ + int index = get_global_id(0); + dst[index] = DOT(a[index], b[index]); +} +)CLC"; + +static constexpr const char* kernel_source_dot_acc_sat = R"CLC( +__kernel void test_dot_acc_sat( + __global DSTTYPE* dst, + __global SRCTYPEA* a, __global SRCTYPEB* b, __global DSTTYPE* acc) +{ + int index = get_global_id(0); + dst[index] = DOT_ACC_SAT(a[index], b[index], acc[index]); +} +)CLC"; + +template +static int test_case_dot(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, bool packed, + bool sat) +{ + log_info(" testing %s = dot%s%s(%s, %s)\n", + std::numeric_limits::is_signed ? "signed" : "unsigned", + sat ? "_acc_sat" : "", packed ? "_packed" : "", + std::numeric_limits::is_signed ? "signed" : "unsigned", + std::numeric_limits::is_signed ? "signed" : "unsigned"); + + cl_int error = CL_SUCCESS; + + clProgramWrapper program; + clKernelWrapper kernel; + + std::string buildOptions; + buildOptions += " -DDSTTYPE="; + buildOptions += TestInfo::deviceTypeName; + buildOptions += " -DSRCTYPEA="; + buildOptions += packed + ? PackedTestInfo::deviceTypeName + : TestInfo::deviceTypeName + std::to_string(N); + buildOptions += " -DSRCTYPEB="; + buildOptions += packed + ? PackedTestInfo::deviceTypeName + : TestInfo::deviceTypeName + std::to_string(N); + std::string packedSuffix; + packedSuffix += std::numeric_limits::is_signed ? "s" : "u"; + packedSuffix += std::numeric_limits::is_signed ? "s" : "u"; + packedSuffix += std::numeric_limits::is_signed ? "_int" : "_uint"; + if (sat) + { + buildOptions += packed + ? " -DDOT_ACC_SAT=dot_acc_sat_4x8packed_" + packedSuffix + : " -DDOT_ACC_SAT=dot_acc_sat"; + } + else + { + buildOptions += + packed ? " -DDOT=dot_4x8packed_" + packedSuffix : " -DDOT=dot"; + } + + std::vector a(N * num_elements); + std::vector b(N * num_elements); + generate_inputs_with_special_values(a, b); + + std::vector acc; + if (sat) + { + acc.resize(num_elements); + generate_acc_sat_inputs(acc); + } + + std::vector reference(num_elements); + calculate_reference(reference, a, b, sat, acc); + + const char* source = sat ? kernel_source_dot_acc_sat : kernel_source_dot; + const char* name = sat ? "test_dot_acc_sat" : "test_dot"; + error = create_single_kernel_helper(context, &program, &kernel, 1, &source, + name, buildOptions.c_str()); + test_error(error, "Unable to create test kernel"); + + clMemWrapper dst = clCreateBuffer( + context, 0, reference.size() * sizeof(DstType), NULL, &error); + test_error(error, "Unable to create output buffer"); + + clMemWrapper srcA = + clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + a.size() * sizeof(SrcTypeA), a.data(), &error); + test_error(error, "Unable to create srcA buffer"); + + clMemWrapper srcB = + clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + b.size() * sizeof(SrcTypeB), b.data(), &error); + test_error(error, "Unable to create srcB buffer"); + + clMemWrapper srcAcc; + if (sat) + { + srcAcc = + clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + acc.size() * sizeof(DstType), acc.data(), &error); + test_error(error, "Unable to create acc buffer"); + } + + error = clSetKernelArg(kernel, 0, sizeof(dst), &dst); + test_error(error, "Unable to set output buffer kernel arg"); + + error = clSetKernelArg(kernel, 1, sizeof(srcA), &srcA); + test_error(error, "Unable to set srcA buffer kernel arg"); + + error = clSetKernelArg(kernel, 2, sizeof(srcB), &srcB); + test_error(error, "Unable to set srcB buffer kernel arg"); + + if (sat) + { + error = clSetKernelArg(kernel, 3, sizeof(srcAcc), &srcAcc); + test_error(error, "Unable to set acc buffer kernel arg"); + } + + size_t global_work_size[] = { reference.size() }; + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, + NULL, 0, NULL, NULL); + test_error(error, "Unable to enqueue test kernel"); + + error = clFinish(queue); + test_error(error, "clFinish failed after test kernel"); + + std::vector results(reference.size(), 99); + error = clEnqueueReadBuffer(queue, dst, CL_TRUE, 0, + results.size() * sizeof(DstType), + results.data(), 0, NULL, NULL); + test_error(error, "Unable to read data after test kernel"); + + if (results != reference) + { + log_error("Result buffer did not match reference buffer!\n"); + return TEST_FAIL; + } + + return TEST_PASS; +} + +template +static int test_vectype(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + int result = TEST_PASS; + + typedef typename std::make_signed::type SSrcType; + typedef typename std::make_signed::type SDstType; + + typedef typename std::make_unsigned::type USrcType; + typedef typename std::make_unsigned::type UDstType; + + // dot testing: + result |= test_case_dot( + deviceID, context, queue, num_elements, false, false); + result |= test_case_dot( + deviceID, context, queue, num_elements, false, false); + result |= test_case_dot( + deviceID, context, queue, num_elements, false, false); + result |= test_case_dot( + deviceID, context, queue, num_elements, false, false); + + // dot_acc_sat testing: + result |= test_case_dot( + deviceID, context, queue, num_elements, false, true); + result |= test_case_dot( + deviceID, context, queue, num_elements, false, true); + result |= test_case_dot( + deviceID, context, queue, num_elements, false, true); + result |= test_case_dot( + deviceID, context, queue, num_elements, false, true); + + return result; +} + +template +static int test_vectype_packed(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + int result = TEST_PASS; + + typedef typename std::make_signed::type SSrcType; + typedef typename std::make_signed::type SDstType; + + typedef typename std::make_unsigned::type USrcType; + typedef typename std::make_unsigned::type UDstType; + + // packed dot testing: + result |= test_case_dot( + deviceID, context, queue, num_elements, true, false); + result |= test_case_dot( + deviceID, context, queue, num_elements, true, false); + result |= test_case_dot( + deviceID, context, queue, num_elements, true, false); + result |= test_case_dot( + deviceID, context, queue, num_elements, true, false); + + // packed dot_acc_sat testing: + result |= test_case_dot( + deviceID, context, queue, num_elements, true, true); + result |= test_case_dot( + deviceID, context, queue, num_elements, true, true); + result |= test_case_dot( + deviceID, context, queue, num_elements, true, true); + result |= test_case_dot( + deviceID, context, queue, num_elements, true, true); + + return result; +} + +int test_integer_dot_product(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + if (!is_extension_available(deviceID, "cl_khr_integer_dot_product")) + { + log_info("cl_khr_integer_dot_product is not supported\n"); + return TEST_SKIPPED_ITSELF; + } + + Version deviceVersion = get_device_cl_version(deviceID); + cl_version extensionVersion; + + if ((deviceVersion >= Version(3, 0)) + || is_extension_available(deviceID, "cl_khr_extended_versioning")) + { + extensionVersion = + get_extension_version(deviceID, "cl_khr_integer_dot_product"); + } + else + { + // Assume 1.0.0 is supported if the version can't be queried + extensionVersion = CL_MAKE_VERSION(1, 0, 0); + } + + cl_int error = CL_SUCCESS; + int result = TEST_PASS; + + cl_device_integer_dot_product_capabilities_khr dotCaps = 0; + error = clGetDeviceInfo(deviceID, + CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR, + sizeof(dotCaps), &dotCaps, NULL); + test_error( + error, + "Unable to query CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR"); + + // Check that the required capabilities are reported + test_assert_error( + dotCaps & CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR, + "When cl_khr_integer_dot_product is supported " + "CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR must be " + "supported"); + + if (extensionVersion >= CL_MAKE_VERSION(2, 0, 0)) + { + test_assert_error( + dotCaps & CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR, + "When cl_khr_integer_dot_product is supported with version >= 2.0.0" + "CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR must be " + "supported"); + } + + // Check that acceleration properties can be queried + if (extensionVersion >= CL_MAKE_VERSION(2, 0, 0)) + { + size_t size_ret; + error = clGetDeviceInfo( + deviceID, + CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR, 0, + nullptr, &size_ret); + test_error( + error, + "Unable to query size of data returned by " + "CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR"); + + cl_device_integer_dot_product_acceleration_properties_khr + accelerationProperties; + error = clGetDeviceInfo( + deviceID, + CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR, + sizeof(accelerationProperties), &accelerationProperties, nullptr); + test_error(error, "Unable to query 8-bit acceleration properties"); + + error = clGetDeviceInfo( + deviceID, + CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR, + 0, nullptr, &size_ret); + test_error( + error, + "Unable to query size of data returned by " + "CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_" + "PACKED_KHR"); + + error = clGetDeviceInfo( + deviceID, + CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR, + sizeof(accelerationProperties), &accelerationProperties, nullptr); + test_error(error, + "Unable to query 4x8-bit packed acceleration properties"); + } + + // Report when unknown capabilities are found + if (dotCaps + & ~(CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR + | CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR)) + { + log_info("NOTE: found an unknown / untested capability!\n"); + } + + // Test built-in functions + if (dotCaps & CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR) + { + result |= test_vectype(deviceID, context, queue, + num_elements); + } + + if (dotCaps & CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR) + { + result |= test_vectype_packed( + deviceID, context, queue, num_elements); + } + + return result; +} diff --git a/test_conformance/integer_ops/test_integers.cpp b/test_conformance/integer_ops/test_integers.cpp index 8d77b24bec..6fa18e1e0b 100644 --- a/test_conformance/integer_ops/test_integers.cpp +++ b/test_conformance/integer_ops/test_integers.cpp @@ -16,14 +16,9 @@ #include "testBase.h" #include "harness/conversions.h" -#define TEST_SIZE 512 +#include -#ifndef MIN - #define MIN( _a, _b ) ((_a) < (_b) ? (_a) : (_b)) -#endif -#ifndef MAX - #define MAX( _a, _b ) ((_a) > (_b) ? (_a) : (_b)) -#endif +#define TEST_SIZE 512 const char *singleParamIntegerKernelSourcePattern = "__kernel void sample_test(__global %s *sourceA, __global %s *destValues)\n" @@ -1512,19 +1507,20 @@ bool verify_integer_clamp( void *sourceA, void *sourceB, void *sourceC, void *de switch( vecAType ) { case kULong: - ((cl_ulong*) destination)[0] = MAX(MIN(valueA, valueC), valueB); + ((cl_ulong *)destination)[0] = + std::max(std::min(valueA, valueC), valueB); break; case kUInt: - ((cl_uint*) destination)[0] = (cl_uint) - (MAX(MIN(valueA, valueC), valueB)); + ((cl_uint *)destination)[0] = + (cl_uint)(std::max(std::min(valueA, valueC), valueB)); break; case kUShort: - ((cl_ushort*) destination)[0] = (cl_ushort) - (MAX(MIN(valueA, valueC), valueB)); + ((cl_ushort *)destination)[0] = + (cl_ushort)(std::max(std::min(valueA, valueC), valueB)); break; case kUChar: - ((cl_uchar*) destination)[0] = (cl_uchar) - (MAX(MIN(valueA, valueC), valueB)); + ((cl_uchar *)destination)[0] = + (cl_uchar)(std::max(std::min(valueA, valueC), valueB)); break; default: //error -- should never get here @@ -1576,19 +1572,20 @@ bool verify_integer_clamp( void *sourceA, void *sourceB, void *sourceC, void *de switch( vecAType ) { case kLong: - ((cl_long*) destination)[0] = MAX(MIN(valueA, valueC), valueB); + ((cl_long *)destination)[0] = + std::max(std::min(valueA, valueC), valueB); break; case kInt: - ((cl_int*) destination)[0] = (cl_int) - (MAX(MIN(valueA, valueC), valueB)); + ((cl_int *)destination)[0] = + (cl_int)(std::max(std::min(valueA, valueC), valueB)); break; case kShort: - ((cl_short*) destination)[0] = (cl_short) - (MAX(MIN(valueA, valueC), valueB)); + ((cl_short *)destination)[0] = + (cl_short)(std::max(std::min(valueA, valueC), valueB)); break; case kChar: - ((cl_char*) destination)[0] = (cl_char) - (MAX(MIN(valueA, valueC), valueB)); + ((cl_char *)destination)[0] = + (cl_char)(std::max(std::min(valueA, valueC), valueB)); break; default: //error -- should never get here @@ -1654,13 +1651,16 @@ bool verify_integer_mad_sat( void *sourceA, void *sourceB, void *sourceC, void * ((cl_ulong*) destination)[0] = multLo; break; case kUInt: - ((cl_uint*) destination)[0] = (cl_uint) MIN( multLo, (cl_ulong) CL_UINT_MAX ); + ((cl_uint *)destination)[0] = + (cl_uint)std::min(multLo, (cl_ulong)CL_UINT_MAX); break; case kUShort: - ((cl_ushort*) destination)[0] = (cl_ushort) MIN( multLo, (cl_ulong) CL_USHRT_MAX ); + ((cl_ushort *)destination)[0] = + (cl_ushort)std::min(multLo, (cl_ulong)CL_USHRT_MAX); break; case kUChar: - ((cl_uchar*) destination)[0] = (cl_uchar) MIN( multLo, (cl_ulong) CL_UCHAR_MAX ); + ((cl_uchar *)destination)[0] = + (cl_uchar)std::min(multLo, (cl_ulong)CL_UCHAR_MAX); break; default: //error -- should never get here @@ -1744,18 +1744,18 @@ bool verify_integer_mad_sat( void *sourceA, void *sourceB, void *sourceC, void * ((cl_long*) destination)[0] = result; break; case kInt: - result = MIN( result, (cl_long) CL_INT_MAX ); - result = MAX( result, (cl_long) CL_INT_MIN ); + result = std::min(result, (cl_long)CL_INT_MAX); + result = std::max(result, (cl_long)CL_INT_MIN); ((cl_int*) destination)[0] = (cl_int) result; break; case kShort: - result = MIN( result, (cl_long) CL_SHRT_MAX ); - result = MAX( result, (cl_long) CL_SHRT_MIN ); + result = std::min(result, (cl_long)CL_SHRT_MAX); + result = std::max(result, (cl_long)CL_SHRT_MIN); ((cl_short*) destination)[0] = (cl_short) result; break; case kChar: - result = MIN( result, (cl_long) CL_CHAR_MAX ); - result = MAX( result, (cl_long) CL_CHAR_MIN ); + result = std::min(result, (cl_long)CL_CHAR_MAX); + result = std::max(result, (cl_long)CL_CHAR_MIN); ((cl_char*) destination)[0] = (cl_char) result; break; default: diff --git a/test_conformance/integer_ops/test_sub_sat.cpp b/test_conformance/integer_ops/test_sub_sat.cpp index 845d106402..2a88ee0df7 100644 --- a/test_conformance/integer_ops/test_sub_sat.cpp +++ b/test_conformance/integer_ops/test_sub_sat.cpp @@ -21,19 +21,9 @@ #include #include -#include "procs.h" - -#define UCHAR_MIN 0 -#define USHRT_MIN 0 -#define UINT_MIN 0 - -#ifndef MAX -#define MAX( _a, _b ) ( (_a) > (_b) ? (_a) : (_b) ) -#endif -#ifndef MIN -#define MIN( _a, _b ) ( (_a) < (_b) ? (_a) : (_b) ) -#endif +#include +#include "procs.h" static int verify_subsat_char( const cl_char *inA, const cl_char *inB, const cl_char *outptr, int n, const char *sizeName, int vecSize ) { @@ -41,8 +31,8 @@ static int verify_subsat_char( const cl_char *inA, const cl_char *inB, const cl_ for( i = 0; i < n; i++ ) { cl_int r = (cl_int) inA[i] - (cl_int) inB[i]; - r = MAX( r, CL_CHAR_MIN ); - r = MIN( r, CL_CHAR_MAX ); + r = std::max(r, CL_CHAR_MIN); + r = std::min(r, CL_CHAR_MAX); if( r != outptr[i] ) { log_info( "\n%d) Failure for sub_sat( (char%s) 0x%2.2x, (char%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; } @@ -56,9 +46,9 @@ static int verify_subsat_uchar( const cl_uchar *inA, const cl_uchar *inB, const for( i = 0; i < n; i++ ) { cl_int r = (cl_int) inA[i] - (cl_int) inB[i]; - r = MAX( r, 0 ); - r = MIN( r, CL_UCHAR_MAX ); - if( r != outptr[i] ) + r = std::max(r, 0); + r = std::min(r, CL_UCHAR_MAX); + if (r != outptr[i]) { log_info( "\n%d) Failure for sub_sat( (uchar%s) 0x%2.2x, (uchar%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; } } return 0; @@ -70,8 +60,8 @@ static int verify_subsat_short( const cl_short *inA, const cl_short *inB, const for( i = 0; i < n; i++ ) { cl_int r = (cl_int) inA[i] - (cl_int) inB[i]; - r = MAX( r, CL_SHRT_MIN ); - r = MIN( r, CL_SHRT_MAX ); + r = std::max(r, CL_SHRT_MIN); + r = std::min(r, CL_SHRT_MAX); if( r != outptr[i] ) { log_info( "\n%d) Failure for sub_sat( (short%s) 0x%4.4x, (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; } @@ -85,8 +75,8 @@ static int verify_subsat_ushort( const cl_ushort *inA, const cl_ushort *inB, con for( i = 0; i < n; i++ ) { cl_int r = (cl_int) inA[i] - (cl_int) inB[i]; - r = MAX( r, 0 ); - r = MIN( r, CL_USHRT_MAX ); + r = std::max(r, 0); + r = std::min(r, CL_USHRT_MAX); if( r != outptr[i] ) { log_info( "\n%d) Failure for sub_sat( (ushort%s) 0x%4.4x, (ushort%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; } diff --git a/test_conformance/integer_ops/test_unary_ops.cpp b/test_conformance/integer_ops/test_unary_ops.cpp index 72940eaa83..c91c85aeb4 100644 --- a/test_conformance/integer_ops/test_unary_ops.cpp +++ b/test_conformance/integer_ops/test_unary_ops.cpp @@ -107,7 +107,7 @@ int test_unary_op( cl_command_queue queue, cl_context context, OpKonstants which // For sub ops, the min control value is 2. Otherwise, it's 0 controlData[ i ] |= 0x02; else if( whichOp == kIncrement ) - // For addition ops, the MAX control value is 1. Otherwise, it's 3 + // For addition ops, the max control value is 1. Otherwise, it's 3 controlData[ i ] &= ~0x02; } streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, diff --git a/test_conformance/math_brute_force/CMakeLists.txt b/test_conformance/math_brute_force/CMakeLists.txt index 8818039420..28d2716f85 100644 --- a/test_conformance/math_brute_force/CMakeLists.txt +++ b/test_conformance/math_brute_force/CMakeLists.txt @@ -1,35 +1,43 @@ set(MODULE_NAME BRUTEFORCE) set(${MODULE_NAME}_SOURCES - FunctionList.cpp - Sleep.cpp - binary.cpp - binaryOperator.cpp - Utility.cpp - binary_i.cpp - binary_two_results_i.cpp - i_unary.cpp - macro_binary.cpp - macro_unary.cpp - mad.cpp + binary_double.cpp + binary_float.cpp + binary_i_double.cpp + binary_i_float.cpp + binary_operator_double.cpp + binary_operator_float.cpp + binary_two_results_i_double.cpp + binary_two_results_i_float.cpp + common.h + function_list.cpp + function_list.h + i_unary_double.cpp + i_unary_float.cpp + macro_binary_double.cpp + macro_binary_float.cpp + macro_unary_double.cpp + macro_unary_float.cpp + mad_double.cpp + mad_float.cpp main.cpp reference_math.cpp - ternary.cpp - unary.cpp - unary_two_results.cpp - unary_two_results_i.cpp - unary_u.cpp + reference_math.h + sleep.cpp + sleep.h + ternary_double.cpp + ternary_float.cpp + test_functions.h + unary_double.cpp + unary_float.cpp + unary_two_results_double.cpp + unary_two_results_float.cpp + unary_two_results_i_double.cpp + unary_two_results_i_float.cpp + unary_u_double.cpp + unary_u_float.cpp + utility.cpp + utility.h ) -if (NOT CMAKE_CL_64 AND NOT MSVC AND NOT ANDROID) -set_source_files_properties( -${MODULE_NAME}_SOURCES - COMPILE_FLAGS -march=i686) -endif (NOT CMAKE_CL_64 AND NOT MSVC AND NOT ANDROID) - -if(CMAKE_COMPILER_IS_GNUCC) -set_source_files_properties( - COMPILE_FLAGS -O0) -endif(CMAKE_COMPILER_IS_GNUCC) - include(../CMakeCommon.txt) diff --git a/test_conformance/math_brute_force/FunctionList.h b/test_conformance/math_brute_force/FunctionList.h deleted file mode 100644 index c22bceebba..0000000000 --- a/test_conformance/math_brute_force/FunctionList.h +++ /dev/null @@ -1,101 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef FUNCTIONLIST_H -#define FUNCTIONLIST_H - -#include "harness/compat.h" - -#ifndef WIN32 -#include -#endif - -#if defined( __APPLE__ ) - #include -#else - #include -#endif - -#include "harness/mt19937.h" - -typedef union fptr -{ - void *p; - double (*f_f)(double); - double (*f_u)(cl_uint); - int (*i_f)(double); - int (*i_f_f)(float); - float (*f_ff_f)(float, float); - double (*f_ff)(double, double); - int (*i_ff)(double, double); - double (*f_fi)(double, int); - double (*f_fpf)(double, double*); - double (*f_fpI)(double, int*); - double (*f_ffpI)(double, double, int*); - double (*f_fff)(double, double, double ); - float (*f_fma)(float, float, float, int); -}fptr; - -typedef union dptr -{ - void *p; - long double (*f_f)(long double); - long double (*f_u)(cl_ulong); - int (*i_f)(long double); - long double (*f_ff)(long double, long double); - int (*i_ff)(long double, long double); - long double (*f_fi)(long double, int); - long double (*f_fpf)(long double, long double*); - long double (*f_fpI)(long double, int*); - long double (*f_ffpI)(long double, long double, int*); - long double (*f_fff)(long double, long double, long double); -}dptr; - -struct Func; - -typedef struct vtbl -{ - const char *type_name; - int (*TestFunc)(const struct Func *, MTdata, bool); - int (*DoubleTestFunc)( - const struct Func *, MTdata, - bool); // may be NULL if function is single precision only -}vtbl; - -typedef struct Func -{ - const char *name; // common name, to be used as an argument in the shell - const char *nameInCode; // name as it appears in the __kernel, usually the same as name, but different for multiplication - fptr func; - dptr dfunc; - fptr rfunc; - float float_ulps; - float double_ulps; - float float_embedded_ulps; - float relaxed_error; - float relaxed_embedded_error; - int ftz; - int relaxed; - const vtbl *vtbl_ptr; -}Func; - - -extern const Func functionList[]; - -extern const size_t functionListCount; - -#endif - - diff --git a/test_conformance/math_brute_force/Sleep.cpp b/test_conformance/math_brute_force/Sleep.cpp deleted file mode 100644 index 4d3b2c64b5..0000000000 --- a/test_conformance/math_brute_force/Sleep.cpp +++ /dev/null @@ -1,118 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "Sleep.h" -#include "Utility.h" - -#if defined( __APPLE__ ) - #include - #include - - struct - { - io_connect_t connection; - IONotificationPortRef port; - io_object_t iterator; - }sleepInfo; - - void sleepCallback( void * refcon, - io_service_t service, - natural_t messageType, - void * messageArgument ); - - void sleepCallback( void * refcon UNUSED, - io_service_t service UNUSED, - natural_t messageType, - void * messageArgument ) - { - - IOReturn result; - /* - service -- The IOService whose state has changed. - messageType -- A messageType enum, defined by IOKit/IOMessage.h or by the IOService's family. - messageArgument -- An argument for the message, dependent on the messageType. - */ - switch ( messageType ) - { - case kIOMessageSystemWillSleep: - // Handle demand sleep (such as sleep caused by running out of - // batteries, closing the lid of a laptop, or selecting - // sleep from the Apple menu. - IOAllowPowerChange(sleepInfo.connection,(long)messageArgument); - vlog( "Hard sleep occurred.\n" ); - break; - case kIOMessageCanSystemSleep: - // In this case, the computer has been idle for several minutes - // and will sleep soon so you must either allow or cancel - // this notification. Important: if you don’t respond, there will - // be a 30-second timeout before the computer sleeps. - // IOCancelPowerChange(root_port,(long)messageArgument); - result = IOCancelPowerChange(sleepInfo.connection,(long)messageArgument); - if( kIOReturnSuccess != result ) - vlog( "sleep prevention failed. (%d)\n", result); - break; - case kIOMessageSystemHasPoweredOn: - // Handle wakeup. - break; - } - } -#endif - - - - - -void PreventSleep( void ) -{ -#if defined( __APPLE__ ) - vlog( "Disabling sleep... " ); - sleepInfo.iterator = (io_object_t) 0; - sleepInfo.port = NULL; - sleepInfo.connection = IORegisterForSystemPower - ( - &sleepInfo, //void * refcon, - &sleepInfo.port, //IONotificationPortRef * thePortRef, - sleepCallback, //IOServiceInterestCallback callback, - &sleepInfo.iterator //io_object_t * notifier - ); - - if( (io_connect_t) 0 == sleepInfo.connection ) - vlog( "failed.\n" ); - else - vlog( "done.\n" ); - - CFRunLoopAddSource(CFRunLoopGetCurrent(), - IONotificationPortGetRunLoopSource(sleepInfo.port), - kCFRunLoopDefaultMode); -#else - vlog( "*** PreventSleep() is not implemented on this platform.\n" ); -#endif -} - -void ResumeSleep( void ) -{ -#if defined( __APPLE__ ) - IOReturn result = IODeregisterForSystemPower ( &sleepInfo.iterator ); - if( 0 != result ) - vlog( "Got error %d restoring sleep \n", result ); - else - vlog( "Sleep restored.\n" ); -#else - vlog( "*** ResumeSleep() is not implemented on this platform.\n" ); -#endif -} - - - diff --git a/test_conformance/math_brute_force/Utility.h b/test_conformance/math_brute_force/Utility.h deleted file mode 100644 index 31256358b3..0000000000 --- a/test_conformance/math_brute_force/Utility.h +++ /dev/null @@ -1,233 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef UTILITY_H -#define UTILITY_H - -#include "harness/compat.h" - -#ifdef __APPLE__ -#include -#else -#include -#endif -#include -#include "harness/rounding_mode.h" -#include "harness/fpcontrol.h" -#include "harness/testHarness.h" -#include "harness/ThreadPool.h" -#include "harness/conversions.h" - -#define BUFFER_SIZE (1024*1024*2) - -#if defined( __GNUC__ ) - #define UNUSED __attribute__ ((unused)) -#else - #define UNUSED -#endif - -struct Func; - -extern int gWimpyBufferSize; -extern int gWimpyReductionFactor; - -#define VECTOR_SIZE_COUNT 6 -extern const char *sizeNames[VECTOR_SIZE_COUNT]; -extern const int sizeValues[VECTOR_SIZE_COUNT]; - -extern cl_device_id gDevice; -extern cl_context gContext; -extern cl_command_queue gQueue; -extern void *gIn; -extern void *gIn2; -extern void *gIn3; -extern void *gOut_Ref; -extern void *gOut_Ref2; -extern void *gOut[VECTOR_SIZE_COUNT]; -extern void *gOut2[VECTOR_SIZE_COUNT]; -extern cl_mem gInBuffer; -extern cl_mem gInBuffer2; -extern cl_mem gInBuffer3; -extern cl_mem gOutBuffer[VECTOR_SIZE_COUNT]; -extern cl_mem gOutBuffer2[VECTOR_SIZE_COUNT]; -extern uint32_t gComputeDevices; -extern uint32_t gSimdSize; -extern int gSkipCorrectnessTesting; -extern int gMeasureTimes; -extern int gReportAverageTimes; -extern int gForceFTZ; -extern int gFastRelaxedDerived; -extern int gWimpyMode; -extern int gHasDouble; -extern int gIsInRTZMode; -extern int gInfNanSupport; -extern int gIsEmbedded; -extern int gVerboseBruteForce; -extern uint32_t gMaxVectorSizeIndex; -extern uint32_t gMinVectorSizeIndex; -extern uint32_t gDeviceFrequency; -extern cl_device_fp_config gFloatCapabilities; -extern cl_device_fp_config gDoubleCapabilities; - -#define LOWER_IS_BETTER 0 -#define HIGHER_IS_BETTER 1 - -#include "harness/errorHelpers.h" - -#if defined (_MSC_VER ) - //Deal with missing scalbn on windows - #define scalbnf( _a, _i ) ldexpf( _a, _i ) - #define scalbn( _a, _i ) ldexp( _a, _i ) - #define scalbnl( _a, _i ) ldexpl( _a, _i ) -#endif - -float Abs_Error( float test, double reference ); -float Ulp_Error( float test, double reference ); -float Bruteforce_Ulp_Error_Double( double test, long double reference ); - -uint64_t GetTime( void ); -double SubtractTime( uint64_t endTime, uint64_t startTime ); -int MakeKernel(const char **c, cl_uint count, const char *name, cl_kernel *k, - cl_program *p, bool relaxedMode); -int MakeKernels(const char **c, cl_uint count, const char *name, - cl_uint kernel_count, cl_kernel *k, cl_program *p, - bool relaxedMode); - -// used to convert a bucket of bits into a search pattern through double -static inline double DoubleFromUInt32( uint32_t bits ); -static inline double DoubleFromUInt32( uint32_t bits ) -{ - union{ uint64_t u; double d;} u; - - // split 0x89abcdef to 0x89abc00000000def - u.u = bits & 0xfffU; - u.u |= (uint64_t) (bits & ~0xfffU) << 32; - - // sign extend the leading bit of def segment as sign bit so that the middle region consists of either all 1s or 0s - u.u -= (bits & 0x800U) << 1; - - // return result - return u.d; -} - -void _LogBuildError( cl_program p, int line, const char *file ); -#define LogBuildError( program ) _LogBuildError( program, __LINE__, __FILE__ ) - -#define PERF_LOOP_COUNT 100 - -//The spec is fairly clear that we may enforce a hard cutoff to prevent premature flushing to zero. -// However, to avoid conflict for 1.0, we are letting results at TYPE_MIN + ulp_limit to be flushed to zero. -static inline int IsFloatResultSubnormal( double x, float ulps ) -{ - x = fabs(x) - MAKE_HEX_DOUBLE( 0x1.0p-149, 0x1, -149) * (double) ulps; - return x < MAKE_HEX_DOUBLE( 0x1.0p-126, 0x1, -126 ); -} - -static inline int IsFloatResultSubnormalAbsError( double x , float abs_err) -{ - x = x - abs_err; - return x < MAKE_HEX_DOUBLE( 0x1.0p-126, 0x1, -126 ); -} - -static inline int IsDoubleResultSubnormal( long double x, float ulps ) -{ - x = fabsl(x) - MAKE_HEX_LONG( 0x1.0p-1074, 0x1, -1074) * (long double) ulps; - return x < MAKE_HEX_LONG( 0x1.0p-1022, 0x1, -1022 ); -} - -static inline int IsFloatInfinity(double x) -{ - union { cl_float d; cl_uint u; } u; - u.d = (cl_float) x; - return ((u.u & 0x7fffffffU) == 0x7F800000U); -} - -static inline int IsFloatMaxFloat(double x) -{ - union { cl_float d; cl_uint u; } u; - u.d = (cl_float) x; - return ((u.u & 0x7fffffffU) == 0x7F7FFFFFU); -} - -static inline int IsFloatNaN(double x) -{ - union { cl_float d; cl_uint u; } u; - u.d = (cl_float) x; - return ((u.u & 0x7fffffffU) > 0x7F800000U); -} - -extern cl_uint RoundUpToNextPowerOfTwo( cl_uint x ); - -// Windows (since long double got deprecated) sets the x87 to 53-bit precision -// (that's x87 default state). This causes problems with the tests that -// convert long and ulong to float and double or otherwise deal with values -// that need more precision than 53-bit. So, set the x87 to 64-bit precision. -static inline void Force64BitFPUPrecision(void) -{ -#if __MINGW32__ - // The usual method is to use _controlfp as follows: - // #include - // _controlfp(_PC_64, _MCW_PC); - // - // _controlfp is available on MinGW32 but not on MinGW64. Instead of having - // divergent code just use inline assembly which works for both. - unsigned short int orig_cw = 0; - unsigned short int new_cw = 0; - __asm__ __volatile__ ("fstcw %0":"=m" (orig_cw)); - new_cw = orig_cw | 0x0300; // set precision to 64-bit - __asm__ __volatile__ ("fldcw %0"::"m" (new_cw)); -#elif defined( _WIN32 ) && defined( __INTEL_COMPILER ) - // Unfortunately, usual method (`_controlfp( _PC_64, _MCW_PC );') does *not* work on win.x64: - // > On the x64 architecture, changing the floating point precision is not supported. - // (Taken from http://msdn.microsoft.com/en-us/library/e9b52ceh%28v=vs.100%29.aspx) - int cw; - __asm { fnstcw cw }; // Get current value of FPU control word. - cw = cw & 0xfffffcff | ( 3 << 8 ); // Set Precision Control to Double Extended Precision. - __asm { fldcw cw }; // Set new value of FPU control word. -#else - /* Implement for other platforms if needed */ -#endif -} - -extern -void memset_pattern4(void *dest, const void *src_pattern, size_t bytes ); - -typedef union -{ - int32_t i; - float f; -}int32f_t; - -typedef union -{ - int64_t l; - double d; -}int64d_t; - -void MulD(double *rhi, double *rlo, double u, double v); -void AddD(double *rhi, double *rlo, double a, double b); -void MulDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl); -void AddDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl); -void DivideDD(double *chi, double *clo, double a, double b); -int compareFloats(float x, float y); -int compareDoubles(double x, double y); - -void logFunctionInfo(const char *fname, unsigned int float_size, unsigned int isFastRelaxed); - -float getAllowedUlpError(const Func *f, const bool relaxed); - -#endif /* UTILITY_H */ - - diff --git a/test_conformance/math_brute_force/binary.cpp b/test_conformance/math_brute_force/binary.cpp deleted file mode 100644 index eb5007c09d..0000000000 --- a/test_conformance/math_brute_force/binary.cpp +++ /dev/null @@ -1,1583 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "Utility.h" - -#include -#include "FunctionList.h" - -int TestFunc_Float_Float_Float(const Func *f, MTdata, bool relaxedMode); -int TestFunc_Double_Double_Double(const Func *f, MTdata, bool relaxedMode); -int TestFunc_Float_Float_Float_nextafter(const Func *f, MTdata, - bool relaxedMode); -int TestFunc_Double_Double_Double_nextafter(const Func *f, MTdata, - bool relaxedMode); -int TestFunc_Float_Float_Float_common(const Func *f, MTdata, int isNextafter, - bool relaxedMode); -int TestFunc_Double_Double_Double_common(const Func *f, MTdata, int isNextafter, - bool relaxedMode); - -const float twoToMinus126 = MAKE_HEX_FLOAT(0x1p-126f, 1, -126); -const double twoToMinus1022 = MAKE_HEX_DOUBLE(0x1p-1022, 1, -1022); - -extern const vtbl _binary = { "binary", TestFunc_Float_Float_Float, - TestFunc_Double_Double_Double }; - -extern const vtbl _binary_nextafter = { - "binary_nextafter", TestFunc_Float_Float_Float_nextafter, - TestFunc_Double_Double_Double_nextafter -}; - -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode); - -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) -{ - const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2 )\n" - "{\n" - " int i = get_global_id(0);\n" - " out[i] = ", name, "( in1[i], in2[i] );\n" - "}\n" - }; - - const char *c3[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global float* in, __global float* in2)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " float3 f0 = vload3( 0, in + 3 * i );\n" - " float3 f1 = vload3( 0, in2 + 3 * i );\n" - " f0 = ", name, "( f0, f1 );\n" - " vstore3( f0, 0, out + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n" - " float3 f0, f1;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " f0 = (float3)( in[3*i], NAN, NAN ); \n" - " f1 = (float3)( in2[3*i], NAN, NAN ); \n" - " break;\n" - " case 0:\n" - " f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n" - " f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n" - " break;\n" - " }\n" - " f0 = ", name, "( f0, f1 );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = f0.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = f0.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - - const char **kern = c; - size_t kernSize = sizeof(c)/sizeof(c[0]); - - if( sizeValues[vectorSize] == 3 ) - { - kern = c3; - kernSize = sizeof(c3)/sizeof(c3[0]); - } - - char testName[32]; - snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - - return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, - relaxedMode); -} - -static int BuildKernelDouble(const char *name, int vectorSize, - cl_uint kernel_count, cl_kernel *k, cl_program *p, - bool relaxedMode) -{ - const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2 )\n" - "{\n" - " int i = get_global_id(0);\n" - " out[i] = ", name, "( in1[i], in2[i] );\n" - "}\n" - }; - - const char *c3[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in, __global double* in2)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " double3 d0 = vload3( 0, in + 3 * i );\n" - " double3 d1 = vload3( 0, in2 + 3 * i );\n" - " d0 = ", name, "( d0, d1 );\n" - " vstore3( d0, 0, out + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n" - " double3 d0, d1;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " d0 = (double3)( in[3*i], NAN, NAN ); \n" - " d1 = (double3)( in2[3*i], NAN, NAN ); \n" - " break;\n" - " case 0:\n" - " d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n" - " d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n" - " break;\n" - " }\n" - " d0 = ", name, "( d0, d1 );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = d0.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = d0.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - - const char **kern = c; - size_t kernSize = sizeof(c)/sizeof(c[0]); - - if( sizeValues[vectorSize] == 3 ) - { - kern = c3; - kernSize = sizeof(c3)/sizeof(c3[0]); - } - - char testName[32]; - snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - - return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, - relaxedMode); -} - -// A table of more difficult cases to get right -static const float specialValuesFloat[] = { - -NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39), MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38), - MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f, -4.0f, -3.5f, - -3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25), - MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27), - MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150), - MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f, - - +NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38), - MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f, - +3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25), - MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27), - MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150), - MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f -}; - -static size_t specialValuesFloatCount = sizeof( specialValuesFloat ) / sizeof( specialValuesFloat[0] ); - -typedef struct BuildKernelInfo -{ - cl_uint offset; // the first vector size to build - cl_uint kernel_count; - cl_kernel **kernels; - cl_program *programs; - const char *nameInCode; - bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -}BuildKernelInfo; - -static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); -static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ) -{ - BuildKernelInfo *info = (BuildKernelInfo*) p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i], info->programs + i, info->relaxedMode); -} - -static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); -static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ) -{ - BuildKernelInfo *info = (BuildKernelInfo*) p; - cl_uint i = info->offset + job_id; - return BuildKernelDouble(info->nameInCode, i, info->kernel_count, - info->kernels[i], info->programs + i, - info->relaxedMode); -} - -//Thread specific data for a worker thread -typedef struct ThreadInfo -{ - cl_mem inBuf; // input buffer for the thread - cl_mem inBuf2; // input buffer for the thread - cl_mem outBuf[ VECTOR_SIZE_COUNT ]; // output buffers for the thread - float maxError; // max error value. Init to 0. - double maxErrorValue; // position of the max error value (param 1). Init to 0. - double maxErrorValue2; // position of the max error value (param 2). Init to 0. - MTdata d; - cl_command_queue tQueue; // per thread command queue to improve performance -}ThreadInfo; - -typedef struct TestInfo -{ - size_t subBufferSize; // Size of the sub-buffer in elements - const Func *f; // A pointer to the function info - cl_program programs[ VECTOR_SIZE_COUNT ]; // programs for various vector sizes - cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id] - ThreadInfo *tinfo; // An array of thread specific information for each worker thread - cl_uint threadCount; // Number of worker threads - cl_uint jobCount; // Number of jobs - cl_uint step; // step between each chunk and the next. - cl_uint scale; // stride between individual test values - float ulps; // max_allowed ulps - int ftz; // non-zero if running in flush to zero mode - - int isFDim; - int skipNanInf; - int isNextafter; - bool relaxedMode; // True if test is running in relaxed mode, false - // otherwise. -} TestInfo; - -static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p ); - -int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter, - bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - size_t i, j; - float maxError = 0.0f; - double maxErrorVal = 0.0; - double maxErrorVal2 = 0.0; - int skipTestingRelaxed = 0; - - logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - - // Init test_info - memset( &test_info, 0, sizeof( test_info ) ); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = 1; - - if (gWimpyMode){ - test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor; - } - test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - //there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; - test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); - - test_info.isFDim = 0 == strcmp( "fdim", f->nameInCode ); - test_info.skipNanInf = test_info.isFDim && ! gInfNanSupport; - test_info.isNextafter = isNextafter; - test_info.relaxedMode = relaxedMode; - // cl_kernels aren't thread safe, so we make one for each vector size for every thread - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - { - size_t array_size = test_info.threadCount * sizeof( cl_kernel ); - test_info.k[i] = (cl_kernel*)malloc( array_size ); - if( NULL == test_info.k[i] ) - { - vlog_error( "Error: Unable to allocate storage for kernels!\n" ); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset( test_info.k[i], 0, array_size ); - } - test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) ); - if( NULL == test_info.tinfo ) - { - vlog_error( "Error: Unable to allocate storage for thread specific data.\n" ); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) ); - for( i = 0; i < test_info.threadCount; i++ ) - { - cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_float), test_info.subBufferSize * sizeof( cl_float) }; - test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if( error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if( error || NULL == test_info.tinfo[i].inBuf2 ) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer2 for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if( error || NULL == test_info.tinfo[i].outBuf[j] ) - { - vlog_error( "Error: Unable to create sub-buffer of gOutBuffer[%d] for region {%zd, %zd}\n", (int) j, region.origin, region.size ); - goto exit; - } - } - test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error); - if( NULL == test_info.tinfo[i].tQueue || error ) - { - vlog_error( "clCreateCommandQueue failed. (%d)\n", error ); - goto exit; - } - - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) - goto exit; - } - - // Run the kernels - if( !gSkipCorrectnessTesting ) - { - error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info ); - - // Accumulate the arithmetic errors - for( i = 0; i < test_info.threadCount; i++ ) - { - if( test_info.tinfo[i].maxError > maxError ) - { - maxError = test_info.tinfo[i].maxError; - maxErrorVal = test_info.tinfo[i].maxErrorValue; - maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; - } - } - - if( error ) - goto exit; - - if( gWimpyMode ) - vlog( "Wimp pass" ); - else - vlog( "passed" ); - } - - - if( gMeasureTimes ) - { - //Init input arrays - uint32_t *p = (uint32_t *)gIn; - uint32_t *p2 = (uint32_t *)gIn2; - for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ ) - { - p[j] = (genrand_int32(d) & ~0x40000000) | 0x20000000; - p2[j] = 0x3fc00000; - } - - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error ); - return error; - } - - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeof( cl_float ) * sizeValues[j]; - size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; // BUFFER_SIZE / vectorSize rounded up - if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; } - if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; } - if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; } - - double sum = 0.0; - double bestTime = INFINITY; - for( i = 0; i < PERF_LOOP_COUNT; i++ ) - { - uint64_t startTime = GetTime(); - if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - - // Make sure OpenCL is done - if( (error = clFinish(gQueue) ) ) - { - vlog_error( "Error %d at clFinish\n", error ); - goto exit; - } - - uint64_t endTime = GetTime(); - double time = SubtractTime( endTime, startTime ); - sum += time; - if( time < bestTime ) - bestTime = time; - } - - if( gReportAverageTimes ) - bestTime = sum / PERF_LOOP_COUNT; - double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( float ) ); - vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] ); - } - } - - if( ! gSkipCorrectnessTesting ) - vlog( "\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2 ); - vlog( "\n" ); - - -exit: - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - { - clReleaseProgram(test_info.programs[i]); - if( test_info.k[i] ) - { - for( j = 0; j < test_info.threadCount; j++ ) - clReleaseKernel(test_info.k[i][j]); - - free( test_info.k[i] ); - } - } - if( test_info.tinfo ) - { - for( i = 0; i < test_info.threadCount; i++ ) - { - free_mtdata( test_info.tinfo[i].d ); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free( test_info.tinfo ); - } - - return error; -} - -static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data ) -{ - const TestInfo *job = (const TestInfo *) data; - size_t buffer_elements = job->subBufferSize; - size_t buffer_size = buffer_elements * sizeof( cl_float ); - cl_uint base = job_id * (cl_uint) job->step; - ThreadInfo *tinfo = job->tinfo + thread_id; - fptr func = job->f->func; - int ftz = job->ftz; - bool relaxedMode = job->relaxedMode; - float ulps = getAllowedUlpError(job->f, relaxedMode); - MTdata d = tinfo->d; - cl_uint j, k; - cl_int error; - cl_uchar *overflow = (cl_uchar*)malloc(buffer_size); - const char *name = job->f->name; - int isFDim = job->isFDim; - int skipNanInf = job->skipNanInf; - int isNextafter = job->isNextafter; - cl_uint *t = 0; - float *r=0,*s=0,*s2=0; - cl_int copysign_test = 0; - RoundingMode oldRoundMode; - int skipVerification = 0; - - if (relaxedMode) - { - if (strcmp(name,"pow")==0 && gFastRelaxedDerived) - { - func = job->f->rfunc; - ulps = INFINITY; - skipVerification = 1; - }else - { - func = job->f->rfunc; - } - } - - // start the map of the output arrays - cl_event e[ VECTOR_SIZE_COUNT ]; - cl_uint *out[ VECTOR_SIZE_COUNT ]; - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - out[j] = (cl_uint*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error); - if( error || NULL == out[j]) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - return error; - } - } - - // Get that moving - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush failed\n" ); - - //Init input array - cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements; - cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements; - j = 0; - - int totalSpecialValueCount = specialValuesFloatCount * specialValuesFloatCount; - int indx = (totalSpecialValueCount - 1) / buffer_elements; - - if (job_id <= (cl_uint)indx) - { // test edge cases - float *fp = (float *)p; - float *fp2 = (float *)p2; - uint32_t x, y; - - x = (job_id * buffer_elements) % specialValuesFloatCount; - y = (job_id * buffer_elements) / specialValuesFloatCount; - - for( ; j < buffer_elements; j++ ) - { - fp[j] = specialValuesFloat[x]; - fp2[j] = specialValuesFloat[y]; - if( ++x >= specialValuesFloatCount ) - { - x = 0; - y++; - if( y >= specialValuesFloatCount ) - break; - } - } - } - - //Init any remaining values. - for( ; j < buffer_elements; j++ ) - { - p[j] = genrand_int32(d); - p2[j] = genrand_int32(d); - } - - if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error ); - goto exit; - } - - if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error ); - goto exit; - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - //Wait for the map to finish - if( (error = clWaitForEvents(1, e + j) )) - { - vlog_error( "Error: clWaitForEvents failed! err: %d\n", error ); - goto exit; - } - if( (error = clReleaseEvent( e[j] ) )) - { - vlog_error( "Error: clReleaseEvent failed! err: %d\n", error ); - goto exit; - } - - // Fill the result buffer with garbage, so that old results don't carry over - uint32_t pattern = 0xffffdead; - memset_pattern4(out[j], &pattern, buffer_size); - if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error ); - goto exit; - } - - // run the kernel - size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j]; - cl_kernel kernel = job->k[j][thread_id]; //each worker thread has its own copy of the cl_kernel - cl_program program = job->programs[j]; - - if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; } - if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; } - if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; } - - if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL))) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - } - - // Get that moving - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush 2 failed\n" ); - - if( gSkipCorrectnessTesting ) - { - if( (error = clFinish(tinfo->tQueue)) ) - { - vlog_error( "Error: clFinish failed! err: %d\n", error ); - goto exit; - } - free(overflow); - return CL_SUCCESS; - } - - FPU_mode_type oldMode; - oldRoundMode = kRoundToNearestEven; - if( isFDim ) - { - //Calculate the correctly rounded reference result - memset( &oldMode, 0, sizeof( oldMode ) ); - if( ftz ) - ForceFTZ( &oldMode ); - - // Set the rounding mode to match the device - if (gIsInRTZMode) - oldRoundMode = set_round(kRoundTowardZero, kfloat); - } - - if(!strcmp(name, "copysign")) - copysign_test = 1; - -#define ref_func(s, s2) (copysign_test ? func.f_ff_f( s, s2 ) : func.f_ff( s, s2 )) - - //Calculate the correctly rounded reference result - r = (float *)gOut_Ref + thread_id * buffer_elements; - s = (float *)gIn + thread_id * buffer_elements; - s2 = (float *)gIn2 + thread_id * buffer_elements; - if( skipNanInf ) - { - for( j = 0; j < buffer_elements; j++ ) - { - feclearexcept(FE_OVERFLOW); - r[j] = (float) ref_func( s[j], s2[j] ); - overflow[j] = FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW)); - } - } - else - { - for( j = 0; j < buffer_elements; j++ ) - r[j] = (float) ref_func( s[j], s2[j] ); - } - - if( isFDim && ftz ) - RestoreFPState( &oldMode ); - - // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue. - for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ ) - { - out[j] = (cl_uint*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error); - if( error || NULL == out[j] ) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - goto exit; - } - } - - // Wait for the last buffer - out[j] = (cl_uint*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error); - if( error || NULL == out[j] ) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - goto exit; - } - - if (!skipVerification) { - //Verify data - t = (cl_uint *)r; - for( j = 0; j < buffer_elements; j++ ) - { - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - cl_uint *q = out[k]; - - // If we aren't getting the correctly rounded result - if( t[j] != q[j] ) - { - float test = ((float*) q)[j]; - double correct = ref_func( s[j], s2[j] ); - - // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow - // As per OpenCL 2.0 spec, section 5.8.4.3, enabling fast-relaxed-math mode also enables - // -cl-finite-math-only optimization. This optimization allows to assume that arguments and - // results are not NaNs or +/-INFs. Hence, accept any result if inputs or results are NaNs or INFs. - if (relaxedMode || skipNanInf) - { - if( skipNanInf && overflow[j]) - continue; - // Note: no double rounding here. Reference functions calculate in single precision. - if( IsFloatInfinity(correct) || IsFloatNaN(correct) || - IsFloatInfinity(s2[j]) || IsFloatNaN(s2[j]) || - IsFloatInfinity(s[j]) || IsFloatNaN(s[j]) ) - continue; - } - - float err = Ulp_Error( test, correct ); - int fail = ! (fabsf(err) <= ulps); - - if( fail && ftz ) - { - // retry per section 6.5.3.2 - if( IsFloatResultSubnormal(correct, ulps ) ) - { - fail = fail && ( test != 0.0f ); - if( ! fail ) - err = 0.0f; - } - - // nextafter on FTZ platforms may return the smallest - // normal float (2^-126) given a denormal or a zero - // as the first argument. The rationale here is that - // nextafter flushes the argument to zero and then - // returns the next representable number in the - // direction of the second argument, and since - // denorms are considered as zero, the smallest - // normal number is the next representable number. - // In which case, it should have the same sign as the - // second argument. - if (isNextafter ) - { - if(IsFloatSubnormal(s[j]) || s[j] == 0.0f) - { - float value = copysignf(twoToMinus126, s2[j]); - fail = fail && (test != value); - if (!fail) - err = 0.0f; - } - } - else - { - // retry per section 6.5.3.3 - if( IsFloatSubnormal( s[j] ) ) - { - double correct2, correct3; - float err2, err3; - - if( skipNanInf ) - feclearexcept(FE_OVERFLOW); - - correct2 = ref_func( 0.0, s2[j] ); - correct3 = ref_func( -0.0, s2[j] ); - - // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow - // As per OpenCL 2.0 spec, section 5.8.4.3, enabling fast-relaxed-math mode also enables - // -cl-finite-math-only optimization. This optimization allows to assume that arguments and - // results are not NaNs or +/-INFs. Hence, accept any result if inputs or results are NaNs or INFs. - if (relaxedMode || skipNanInf) - { - if( fetestexcept(FE_OVERFLOW) && skipNanInf ) - continue; - - // Note: no double rounding here. Reference functions calculate in single precision. - if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) || - IsFloatInfinity(correct3) || IsFloatNaN(correct3) ) - continue; - } - - err2 = Ulp_Error( test, correct2 ); - err3 = Ulp_Error( test, correct3 ); - fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - - // retry per section 6.5.3.4 - if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - - //try with both args as zero - if( IsFloatSubnormal( s2[j] ) ) - { - double correct4, correct5; - float err4, err5; - - if( skipNanInf ) - feclearexcept(FE_OVERFLOW); - - correct2 = ref_func( 0.0, 0.0 ); - correct3 = ref_func( -0.0, 0.0 ); - correct4 = ref_func( 0.0, -0.0 ); - correct5 = ref_func( -0.0, -0.0 ); - - // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow - // As per OpenCL 2.0 spec, section 5.8.4.3, enabling fast-relaxed-math mode also enables - // -cl-finite-math-only optimization. This optimization allows to assume that arguments and - // results are not NaNs or +/-INFs. Hence, accept any result if inputs or results are NaNs or INFs. - if (relaxedMode || skipNanInf) - { - if( fetestexcept(FE_OVERFLOW) && skipNanInf ) - continue; - - // Note: no double rounding here. Reference functions calculate in single precision. - if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) || - IsFloatInfinity(correct3) || IsFloatNaN(correct3) || - IsFloatInfinity(correct4) || IsFloatNaN(correct4) || - IsFloatInfinity(correct5) || IsFloatNaN(correct5) ) - continue; - } - - err2 = Ulp_Error( test, correct2 ); - err3 = Ulp_Error( test, correct3 ); - err4 = Ulp_Error( test, correct4 ); - err5 = Ulp_Error( test, correct5 ); - fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)) && - (!(fabsf(err4) <= ulps)) && (!(fabsf(err5) <= ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - if( fabsf( err4 ) < fabsf(err ) ) - err = err4; - if( fabsf( err5 ) < fabsf(err ) ) - err = err5; - - // retry per section 6.5.3.4 - if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) || - IsFloatResultSubnormal( correct4, ulps ) || IsFloatResultSubnormal( correct5, ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - } - } - else if(IsFloatSubnormal(s2[j]) ) - { - double correct2, correct3; - float err2, err3; - - if( skipNanInf ) - feclearexcept(FE_OVERFLOW); - - correct2 = ref_func( s[j], 0.0 ); - correct3 = ref_func( s[j], -0.0 ); - - // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow - // As per OpenCL 2.0 spec, section 5.8.4.3, enabling fast-relaxed-math mode also enables - // -cl-finite-math-only optimization. This optimization allows to assume that arguments and - // results are not NaNs or +/-INFs. Hence, accept any result if inputs or results are NaNs or INFs. - if (relaxedMode || skipNanInf) - { - // Note: no double rounding here. Reference functions calculate in single precision. - if( overflow[j] && skipNanInf) - continue; - - if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) || - IsFloatInfinity(correct3) || IsFloatNaN(correct3) ) - continue; - } - - err2 = Ulp_Error( test, correct2 ); - err3 = Ulp_Error( test, correct3 ); - fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - - // retry per section 6.5.3.4 - if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - } - } - } - - if( fabsf(err ) > tinfo->maxError ) - { - tinfo->maxError = fabsf(err); - tinfo->maxErrorValue = s[j]; - tinfo->maxErrorValue2 = s2[j]; - } - if( fail ) - { - vlog_error( "\nERROR: %s%s: %f ulp error at {%a (0x%x), %a (0x%x)}: *%a vs. %a (0x%8.8x) at index: %d\n", name, sizeNames[k], err, s[j], ((cl_uint*)s)[j], s2[j], ((cl_uint*)s2)[j], r[j], test, ((cl_uint*)&test)[0], j ); - error = -1; - goto exit; - } - } - } - } - } - - if (isFDim && gIsInRTZMode) - (void)set_round(oldRoundMode, kfloat); - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) ) - { - vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error ); - return error; - } - } - - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush 3 failed\n" ); - - - if( 0 == ( base & 0x0fffffff) ) - { - if (gVerboseBruteForce) - { - vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->ulps, job->threadCount); - } else - { - vlog("." ); - } - fflush(stdout); - } - - -exit: - if( overflow ) - free( overflow ); - return error; - -} - - -// A table of more difficult cases to get right -static const double specialValuesDouble[] = { - -NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10), - MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100., -4.0, -3.5, - -3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53), - MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55), - MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074), - MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074), - MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0, - - +NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10), - MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100., +4.0, +3.5, - +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53), - MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55), - MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074), - MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074), - MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0, -}; - -static size_t specialValuesDoubleCount = sizeof( specialValuesDouble ) / sizeof( specialValuesDouble[0] ); - -static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *p ); - -int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, - int isNextafter, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - size_t i, j; - float maxError = 0.0f; - double maxErrorVal = 0.0; - double maxErrorVal2 = 0.0; - - logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); - - // Init test_info - memset( &test_info, 0, sizeof( test_info ) ); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = 1; - - - if (gWimpyMode){ - test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor; - } - test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - //there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ulps = f->double_ulps; - test_info.ftz = f->ftz || gForceFTZ; - - test_info.isFDim = 0 == strcmp( "fdim", f->nameInCode ); - test_info.skipNanInf = 0; - test_info.isNextafter = isNextafter; - // cl_kernels aren't thread safe, so we make one for each vector size for every thread - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - { - size_t array_size = test_info.threadCount * sizeof( cl_kernel ); - test_info.k[i] = (cl_kernel*)malloc( array_size ); - if( NULL == test_info.k[i] ) - { - vlog_error( "Error: Unable to allocate storage for kernels!\n" ); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset( test_info.k[i], 0, array_size ); - } - test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) ); - if( NULL == test_info.tinfo ) - { - vlog_error( "Error: Unable to allocate storage for thread specific data.\n" ); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) ); - for( i = 0; i < test_info.threadCount; i++ ) - { - cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_double), test_info.subBufferSize * sizeof( cl_double) }; - test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if( error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if( error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if( error || NULL == test_info.tinfo[i].outBuf[j] ) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - } - test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error); - if( NULL == test_info.tinfo[i].tQueue || error ) - { - vlog_error( "clCreateCommandQueue failed. (%d)\n", error ); - goto exit; - } - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); - } - - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) - goto exit; - } - - if( !gSkipCorrectnessTesting ) - { - error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info ); - - // Accumulate the arithmetic errors - for( i = 0; i < test_info.threadCount; i++ ) - { - if( test_info.tinfo[i].maxError > maxError ) - { - maxError = test_info.tinfo[i].maxError; - maxErrorVal = test_info.tinfo[i].maxErrorValue; - maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; - } - } - - if( error ) - goto exit; - - if( gWimpyMode ) - vlog( "Wimp pass" ); - else - vlog( "passed" ); - } - - if( gMeasureTimes ) - { - //Init input arrays - double *p = (double *)gIn; - double *p2 = (double *)gIn2; - for( j = 0; j < BUFFER_SIZE / sizeof( cl_double ); j++ ) - { - p[j] = DoubleFromUInt32(genrand_int32(d)); - p2[j] = DoubleFromUInt32(genrand_int32(d)); - } - - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error ); - return error; - } - - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeof( cl_double ) * sizeValues[j]; - size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; // BUFFER_SIZE / vectorSize rounded up - if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; } - if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; } - if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; } - - double sum = 0.0; - double bestTime = INFINITY; - for( i = 0; i < PERF_LOOP_COUNT; i++ ) - { - uint64_t startTime = GetTime(); - if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - - // Make sure OpenCL is done - if( (error = clFinish(gQueue) ) ) - { - vlog_error( "Error %d at clFinish\n", error ); - goto exit; - } - - uint64_t endTime = GetTime(); - double time = SubtractTime( endTime, startTime ); - sum += time; - if( time < bestTime ) - bestTime = time; - } - - if( gReportAverageTimes ) - bestTime = sum / PERF_LOOP_COUNT; - double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( double ) ); - vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] ); - } - for( ; j < gMaxVectorSizeIndex; j++ ) - vlog( "\t -- " ); - } - - if( ! gSkipCorrectnessTesting ) - vlog( "\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2 ); - vlog( "\n" ); - - -exit: - // Release - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - { - clReleaseProgram(test_info.programs[i]); - if( test_info.k[i] ) - { - for( j = 0; j < test_info.threadCount; j++ ) - clReleaseKernel(test_info.k[i][j]); - - free( test_info.k[i] ); - } - } - if( test_info.tinfo ) - { - for( i = 0; i < test_info.threadCount; i++ ) - { - free_mtdata( test_info.tinfo[i].d ); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free( test_info.tinfo ); - } - - return error; -} - -static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data ) -{ - const TestInfo *job = (const TestInfo *) data; - size_t buffer_elements = job->subBufferSize; - size_t buffer_size = buffer_elements * sizeof( cl_double ); - cl_uint base = job_id * (cl_uint) job->step; - ThreadInfo *tinfo = job->tinfo + thread_id; - float ulps = job->ulps; - dptr func = job->f->dfunc; - int ftz = job->ftz; - MTdata d = tinfo->d; - cl_uint j, k; - cl_int error; - const char *name = job->f->name; - - int isNextafter = job->isNextafter; - cl_ulong *t; - cl_double *r,*s,*s2; - - Force64BitFPUPrecision(); - - // start the map of the output arrays - cl_event e[ VECTOR_SIZE_COUNT ]; - cl_ulong *out[ VECTOR_SIZE_COUNT ]; - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error); - if( error || NULL == out[j]) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - return error; - } - } - - // Get that moving - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush failed\n" ); - - //Init input array - cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements; - cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements; - j = 0; - int totalSpecialValueCount = specialValuesDoubleCount * specialValuesDoubleCount; - int indx = (totalSpecialValueCount - 1) / buffer_elements; - - if( job_id <= (cl_uint)indx ) - { // test edge cases - cl_double *fp = (cl_double *)p; - cl_double *fp2 = (cl_double *)p2; - uint32_t x, y; - - x = (job_id * buffer_elements) % specialValuesDoubleCount; - y = (job_id * buffer_elements) / specialValuesDoubleCount; - - for( ; j < buffer_elements; j++ ) - { - fp[j] = specialValuesDouble[x]; - fp2[j] = specialValuesDouble[y]; - if( ++x >= specialValuesDoubleCount ) - { - x = 0; - y++; - if( y >= specialValuesDoubleCount ) - break; - } - } - } - - //Init any remaining values. - for( ; j < buffer_elements; j++ ) - { - p[j] = genrand_int64(d); - p2[j] = genrand_int64(d); - } - - if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error ); - goto exit; - } - - if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error ); - goto exit; - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - //Wait for the map to finish - if( (error = clWaitForEvents(1, e + j) )) - { - vlog_error( "Error: clWaitForEvents failed! err: %d\n", error ); - goto exit; - } - if( (error = clReleaseEvent( e[j] ) )) - { - vlog_error( "Error: clReleaseEvent failed! err: %d\n", error ); - goto exit; - } - - // Fill the result buffer with garbage, so that old results don't carry over - uint32_t pattern = 0xffffdead; - memset_pattern4(out[j], &pattern, buffer_size); - if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error ); - goto exit; - } - - // run the kernel - size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j]; - cl_kernel kernel = job->k[j][thread_id]; //each worker thread has its own copy of the cl_kernel - cl_program program = job->programs[j]; - - if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; } - if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; } - if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; } - - if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL))) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - } - - // Get that moving - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush 2 failed\n" ); - - if( gSkipCorrectnessTesting ) - return CL_SUCCESS; - - //Calculate the correctly rounded reference result - r = (cl_double *)gOut_Ref + thread_id * buffer_elements; - s = (cl_double *)gIn + thread_id * buffer_elements; - s2 = (cl_double *)gIn2 + thread_id * buffer_elements; - for( j = 0; j < buffer_elements; j++ ) - r[j] = (cl_double) func.f_ff( s[j], s2[j] ); - - // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue. - for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ ) - { - out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error); - if( error || NULL == out[j] ) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - goto exit; - } - } - - // Wait for the last buffer - out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error); - if( error || NULL == out[j] ) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - goto exit; - } - - //Verify data - t = (cl_ulong *)r; - for( j = 0; j < buffer_elements; j++ ) - { - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - cl_ulong *q = out[k]; - - // If we aren't getting the correctly rounded result - if( t[j] != q[j] ) - { - cl_double test = ((cl_double*) q)[j]; - long double correct = func.f_ff( s[j], s2[j] ); - float err = Bruteforce_Ulp_Error_Double( test, correct ); - int fail = ! (fabsf(err) <= ulps); - - if( fail && ftz ) - { - // retry per section 6.5.3.2 - if( IsDoubleResultSubnormal(correct, ulps ) ) - { - fail = fail && ( test != 0.0f ); - if( ! fail ) - err = 0.0f; - } - - // nextafter on FTZ platforms may return the smallest - // normal float (2^-126) given a denormal or a zero - // as the first argument. The rationale here is that - // nextafter flushes the argument to zero and then - // returns the next representable number in the - // direction of the second argument, and since - // denorms are considered as zero, the smallest - // normal number is the next representable number. - // In which case, it should have the same sign as the - // second argument. - if (isNextafter ) - { - if(IsDoubleSubnormal(s[j]) || s[j] == 0.0f) - { - cl_double value = copysign(twoToMinus1022, s2[j]); - fail = fail && (test != value); - if (!fail) - err = 0.0f; - } - } - else - { - // retry per section 6.5.3.3 - if( IsDoubleSubnormal( s[j] ) ) - { - long double correct2 = func.f_ff( 0.0, s2[j] ); - long double correct3 = func.f_ff( -0.0, s2[j] ); - float err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); - float err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); - fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - - // retry per section 6.5.3.4 - if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - - //try with both args as zero - if( IsDoubleSubnormal( s2[j] ) ) - { - correct2 = func.f_ff( 0.0, 0.0 ); - correct3 = func.f_ff( -0.0, 0.0 ); - long double correct4 = func.f_ff( 0.0, -0.0 ); - long double correct5 = func.f_ff( -0.0, -0.0 ); - err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); - err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); - float err4 = Bruteforce_Ulp_Error_Double( test, correct4 ); - float err5 = Bruteforce_Ulp_Error_Double( test, correct5 ); - fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)) && - (!(fabsf(err4) <= ulps)) && (!(fabsf(err5) <= ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - if( fabsf( err4 ) < fabsf(err ) ) - err = err4; - if( fabsf( err5 ) < fabsf(err ) ) - err = err5; - - // retry per section 6.5.3.4 - if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) || - IsDoubleResultSubnormal( correct4, ulps ) || IsDoubleResultSubnormal( correct5, ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - } - } - else if(IsDoubleSubnormal(s2[j]) ) - { - long double correct2 = func.f_ff( s[j], 0.0 ); - long double correct3 = func.f_ff( s[j], -0.0 ); - float err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); - float err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); - fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - - // retry per section 6.5.3.4 - if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - } - } - } - - if( fabsf(err ) > tinfo->maxError ) - { - tinfo->maxError = fabsf(err); - tinfo->maxErrorValue = s[j]; - tinfo->maxErrorValue2 = s2[j]; - } - if( fail ) - { - vlog_error( "\nERROR: %s%s: %f ulp error at {%.13la, %.13la}: *%.13la vs. %.13la\n", name, sizeNames[k], err, s[j], s2[j], r[j], test ); - error = -1; - goto exit; - } - } - } - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) ) - { - vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error ); - return error; - } - } - - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush 3 failed\n" ); - - - if( 0 == ( base & 0x0fffffff) ) - { - if (gVerboseBruteForce) - { - vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->ulps, job->threadCount); - } else - { - vlog("." ); - } - fflush(stdout); - } -exit: - return error; - -} - -int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) -{ - return TestFunc_Float_Float_Float_common(f, d, 0, relaxedMode); -} - -int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode) -{ - return TestFunc_Double_Double_Double_common(f, d, 0, relaxedMode); -} - -int TestFunc_Float_Float_Float_nextafter(const Func *f, MTdata d, - bool relaxedMode) -{ - return TestFunc_Float_Float_Float_common(f, d, 1, relaxedMode); -} - -int TestFunc_Double_Double_Double_nextafter(const Func *f, MTdata d, - bool relaxedMode) -{ - return TestFunc_Double_Double_Double_common(f, d, 1, relaxedMode); -} - diff --git a/test_conformance/math_brute_force/binaryOperator.cpp b/test_conformance/math_brute_force/binaryOperator.cpp deleted file mode 100644 index 0742964d3b..0000000000 --- a/test_conformance/math_brute_force/binaryOperator.cpp +++ /dev/null @@ -1,1493 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "Utility.h" - -#include -#include "FunctionList.h" - -int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata, - bool relaxedMode); -int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata, - bool relaxedMode); - -extern const vtbl _binary_operator = { "binaryOperator", - TestFunc_Float_Float_Float_Operator, - TestFunc_Double_Double_Double_Operator }; - -static int BuildKernel(const char *name, const char *operator_symbol, - int vectorSize, cl_uint kernel_count, cl_kernel *k, - cl_program *p, bool relaxedMode); -static int BuildKernelDouble(const char *name, const char *operator_symbol, - int vectorSize, cl_uint kernel_count, cl_kernel *k, - cl_program *p, bool relaxedMode); - -static int BuildKernel(const char *name, const char *operator_symbol, - int vectorSize, cl_uint kernel_count, cl_kernel *k, - cl_program *p, bool relaxedMode) -{ - const char *c[] = { - "__kernel void ", name, "_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2 )\n" - "{\n" - " size_t i = get_global_id(0);\n" - " out[i] = in1[i] ", operator_symbol, " in2[i];\n" - "}\n" - }; - const char *c3[] = { "__kernel void ", name, "_kernel", sizeNames[vectorSize], "( __global float* out, __global float* in, __global float* in2)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " float3 f0 = vload3( 0, in + 3 * i );\n" - " float3 f1 = vload3( 0, in2 + 3 * i );\n" - " f0 = f0 ", operator_symbol, " f1;\n" - " vstore3( f0, 0, out + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n" - " float3 f0, f1;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " f0 = (float3)( in[3*i], NAN, NAN ); \n" - " f1 = (float3)( in2[3*i], NAN, NAN ); \n" - " break;\n" - " case 0:\n" - " f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n" - " f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n" - " break;\n" - " }\n" - " f0 = f0 ", operator_symbol, " f1;\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = f0.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = f0.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - - const char **kern = c; - size_t kernSize = sizeof(c)/sizeof(c[0]); - - if( sizeValues[vectorSize] == 3 ) - { - kern = c3; - kernSize = sizeof(c3)/sizeof(c3[0]); - } - - char testName[32]; - snprintf( testName, sizeof( testName ) -1, "%s_kernel%s", name, sizeNames[vectorSize] ); - - return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, - relaxedMode); -} - -static int BuildKernelDouble(const char *name, const char *operator_symbol, - int vectorSize, cl_uint kernel_count, cl_kernel *k, - cl_program *p, bool relaxedMode) -{ - const char *c[] = { - "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void ", name, "_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2 )\n" - "{\n" - " size_t i = get_global_id(0);\n" - " out[i] = in1[i] ", operator_symbol, " in2[i];\n" - "}\n" - }; - const char *c3[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" - "__kernel void ", name, "_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in, __global double* in2)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " double3 d0 = vload3( 0, in + 3 * i );\n" - " double3 d1 = vload3( 0, in2 + 3 * i );\n" - " d0 = d0 ", operator_symbol, " d1;\n" - " vstore3( d0, 0, out + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n" - " double3 d0, d1;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " d0 = (double3)( in[3*i], NAN, NAN ); \n" - " d1 = (double3)( in2[3*i], NAN, NAN ); \n" - " break;\n" - " case 0:\n" - " d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n" - " d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n" - " break;\n" - " }\n" - " d0 = d0 ", operator_symbol, " d1;\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = d0.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = d0.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - - const char **kern = c; - size_t kernSize = sizeof(c)/sizeof(c[0]); - - if( sizeValues[vectorSize] == 3 ) - { - kern = c3; - kernSize = sizeof(c3)/sizeof(c3[0]); - } - - char testName[32]; - snprintf( testName, sizeof( testName ) -1, "%s_kernel%s", name, sizeNames[vectorSize] ); - - return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, - relaxedMode); -} - -typedef struct BuildKernelInfo -{ - cl_uint offset; // the first vector size to build - cl_uint kernel_count; - cl_kernel **kernels; - cl_program *programs; - const char *name; - const char *operator_symbol; - bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -}BuildKernelInfo; - -static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); -static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ) -{ - BuildKernelInfo *info = (BuildKernelInfo*) p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->name, info->operator_symbol, i, info->kernel_count, - info->kernels[i], info->programs + i, info->relaxedMode); -} - -static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); -static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ) -{ - BuildKernelInfo *info = (BuildKernelInfo*) p; - cl_uint i = info->offset + job_id; - return BuildKernelDouble(info->name, info->operator_symbol, i, - info->kernel_count, info->kernels[i], - info->programs + i, info->relaxedMode); -} - -//Thread specific data for a worker thread -typedef struct ThreadInfo -{ - cl_mem inBuf; // input buffer for the thread - cl_mem inBuf2; // input buffer for the thread - cl_mem outBuf[ VECTOR_SIZE_COUNT ]; // output buffers for the thread - float maxError; // max error value. Init to 0. - double maxErrorValue; // position of the max error value (param 1). Init to 0. - double maxErrorValue2; // position of the max error value (param 2). Init to 0. - MTdata d; - cl_command_queue tQueue; // per thread command queue to improve performance -}ThreadInfo; - -typedef struct TestInfo -{ - size_t subBufferSize; // Size of the sub-buffer in elements - const Func *f; // A pointer to the function info - cl_program programs[ VECTOR_SIZE_COUNT ]; // programs for various vector sizes - cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id] - ThreadInfo *tinfo; // An array of thread specific information for each worker thread - cl_uint threadCount; // Number of worker threads - cl_uint jobCount; // Number of jobs - cl_uint step; // step between each chunk and the next. - cl_uint scale; // stride between individual test values - float ulps; // max_allowed ulps - int ftz; // non-zero if running in flush to zero mode - bool relaxedMode; // True if the test is being run in relaxed mode, false - // otherwise. - - // no special fields -}TestInfo; - -static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p ); - -// A table of more difficult cases to get right -static const float specialValuesFloat[] = { - -NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39), MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38), - MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f, -4.0f, -3.5f, - -3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25), - MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27), - MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150), - MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f, - - +NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38), - MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f, - +3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25), - MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27), - MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150), - MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f -}; - -static size_t specialValuesFloatCount = sizeof( specialValuesFloat ) / sizeof( specialValuesFloat[0] ); - -static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p ); - -int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, - bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - size_t i, j; - float maxError = 0.0f; - double maxErrorVal = 0.0; - double maxErrorVal2 = 0.0; - - logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - - // Init test_info - memset( &test_info, 0, sizeof( test_info ) ); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = 1; - if (gWimpyMode) { - test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor; - } - - test_info.step = test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - //there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; - test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); - test_info.relaxedMode = relaxedMode; - - // cl_kernels aren't thread safe, so we make one for each vector size for every thread - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - { - size_t array_size = test_info.threadCount * sizeof( cl_kernel ); - test_info.k[i] = (cl_kernel*)malloc( array_size ); - if( NULL == test_info.k[i] ) - { - vlog_error( "Error: Unable to allocate storage for kernels!\n" ); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset( test_info.k[i], 0, array_size ); - } - test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) ); - if( NULL == test_info.tinfo ) - { - vlog_error( "Error: Unable to allocate storage for thread specific data.\n" ); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) ); - for( i = 0; i < test_info.threadCount; i++ ) - { - cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_float), test_info.subBufferSize * sizeof( cl_float) }; - test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if( error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if( error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if( error || NULL == test_info.tinfo[i].outBuf[j] ) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - } - test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error); - if( NULL == test_info.tinfo[i].tQueue || error ) - { - vlog_error( "clCreateCommandQueue failed. (%d)\n", error ); - goto exit; - } - - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); - } - - // Init the kernels - { - BuildKernelInfo build_info = { gMinVectorSizeIndex, - test_info.threadCount, - test_info.k, - test_info.programs, - f->name, - f->nameInCode, - relaxedMode }; - if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) - goto exit; - } - - if( !gSkipCorrectnessTesting ) - { - error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info ); - - // Accumulate the arithmetic errors - for( i = 0; i < test_info.threadCount; i++ ) - { - if( test_info.tinfo[i].maxError > maxError ) - { - maxError = test_info.tinfo[i].maxError; - maxErrorVal = test_info.tinfo[i].maxErrorValue; - maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; - } - } - - if( error ) - goto exit; - - if( gWimpyMode ) - vlog( "Wimp pass" ); - else - vlog( "passed" ); - } - - - if( gMeasureTimes ) - { - //Init input arrays - uint32_t *p = (uint32_t *)gIn; - uint32_t *p2 = (uint32_t *)gIn2; - for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ ) - { - p[j] = (genrand_int32(d) & ~0x40000000) | 0x20000000; - p2[j] = 0x3fc00000; - } - - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error ); - return error; - } - - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeof( cl_float ) * sizeValues[j]; - size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; // BUFFER_SIZE / vectorSize rounded up - if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; } - if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; } - if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; } - - double sum = 0.0; - double bestTime = INFINITY; - for( i = 0; i < PERF_LOOP_COUNT; i++ ) - { - uint64_t startTime = GetTime(); - if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - - // Make sure OpenCL is done - if( (error = clFinish(gQueue) ) ) - { - vlog_error( "Error %d at clFinish\n", error ); - goto exit; - } - - uint64_t endTime = GetTime(); - double time = SubtractTime( endTime, startTime ); - sum += time; - if( time < bestTime ) - bestTime = time; - } - - if( gReportAverageTimes ) - bestTime = sum / PERF_LOOP_COUNT; - double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( float ) ); - vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] ); - } - } - - if( ! gSkipCorrectnessTesting ) - vlog( "\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2 ); - vlog( "\n" ); - - -exit: - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - { - clReleaseProgram(test_info.programs[i]); - if( test_info.k[i] ) - { - for( j = 0; j < test_info.threadCount; j++ ) - clReleaseKernel(test_info.k[i][j]); - - free( test_info.k[i] ); - } - } - if( test_info.tinfo ) - { - for( i = 0; i < test_info.threadCount; i++ ) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free( test_info.tinfo ); - } - - return error; -} - -static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) -{ - const TestInfo *job = (const TestInfo *) data; - size_t buffer_elements = job->subBufferSize; - size_t buffer_size = buffer_elements * sizeof( cl_float ); - cl_uint base = job_id * (cl_uint) job->step; - ThreadInfo *tinfo = job->tinfo + thread_id; - fptr func = job->f->func; - bool relaxedMode = job->relaxedMode; - float ulps = getAllowedUlpError(job->f, relaxedMode); - if (relaxedMode) - { - func = job->f->rfunc; - } - - - int ftz = job->ftz; - MTdata d = tinfo->d; - cl_uint j, k; - cl_int error; - cl_uchar *overflow = (cl_uchar*)malloc(buffer_size); - const char *name = job->f->name; - cl_uint *t; - cl_float *r,*s,*s2; - RoundingMode oldRoundMode; - - // start the map of the output arrays - cl_event e[ VECTOR_SIZE_COUNT ]; - cl_uint *out[ VECTOR_SIZE_COUNT ]; - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error); - if( error || NULL == out[j]) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - return error; - } - } - - // Get that moving - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush failed\n" ); - - //Init input array - cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements; - cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements; - j = 0; - - int totalSpecialValueCount = specialValuesFloatCount * specialValuesFloatCount; - int indx = (totalSpecialValueCount - 1) / buffer_elements; - - - if( job_id <= (cl_uint)indx ) { - // Insert special values - uint32_t x, y; - - x = (job_id * buffer_elements) % specialValuesFloatCount; - y = (job_id * buffer_elements) / specialValuesFloatCount; - - for( ; j < buffer_elements; j++ ) { - p[j] = ((cl_uint *)specialValuesFloat)[x]; - p2[j] = ((cl_uint *)specialValuesFloat)[y]; - ++x; - if (x >= specialValuesFloatCount) { - x = 0; - y++; - if (y >= specialValuesFloatCount) - break; - } - if (relaxedMode && strcmp(name, "divide") == 0) - { - cl_uint pj = p[j] & 0x7fffffff; - cl_uint p2j = p2[j] & 0x7fffffff; - // Replace values outside [2^-62, 2^62] with QNaN - if (pj < 0x20800000 || pj > 0x5e800000) - p[j] = 0x7fc00000; - if (p2j < 0x20800000 || p2j > 0x5e800000) - p2[j] = 0x7fc00000; - } - } - } - - // Init any remaining values. - for( ; j < buffer_elements; j++ ) - { - p[j] = genrand_int32(d); - p2[j] = genrand_int32(d); - - if (relaxedMode && strcmp(name, "divide") == 0) - { - cl_uint pj = p[j] & 0x7fffffff; - cl_uint p2j = p2[j] & 0x7fffffff; - // Replace values outside [2^-62, 2^62] with QNaN - if (pj < 0x20800000 || pj > 0x5e800000) - p[j] = 0x7fc00000; - if (p2j < 0x20800000 || p2j > 0x5e800000) - p2[j] = 0x7fc00000; - } - } - - if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error ); - goto exit; - } - - if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error ); - goto exit; - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - //Wait for the map to finish - if( (error = clWaitForEvents(1, e + j) )) - { - vlog_error( "Error: clWaitForEvents failed! err: %d\n", error ); - goto exit; - } - if( (error = clReleaseEvent( e[j] ) )) - { - vlog_error( "Error: clReleaseEvent failed! err: %d\n", error ); - goto exit; - } - - // Fill the result buffer with garbage, so that old results don't carry over - uint32_t pattern = 0xffffdead; - memset_pattern4(out[j], &pattern, buffer_size); - if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error ); - goto exit; - } - - // run the kernel - size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j]; - cl_kernel kernel = job->k[j][thread_id]; //each worker thread has its own copy of the cl_kernel - cl_program program = job->programs[j]; - - if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; } - if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; } - if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; } - - if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL))) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - } - - // Get that moving - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush 2 failed\n" ); - - if( gSkipCorrectnessTesting ) - { - free( overflow ); - return CL_SUCCESS; - } - - //Calculate the correctly rounded reference result - FPU_mode_type oldMode; - memset( &oldMode, 0, sizeof( oldMode ) ); - if( ftz ) - ForceFTZ( &oldMode ); - - // Set the rounding mode to match the device - oldRoundMode = kRoundToNearestEven; - if (gIsInRTZMode) - oldRoundMode = set_round(kRoundTowardZero, kfloat); - - //Calculate the correctly rounded reference result - r = (float *)gOut_Ref + thread_id * buffer_elements; - s = (float *)gIn + thread_id * buffer_elements; - s2 = (float *)gIn2 + thread_id * buffer_elements; - if( gInfNanSupport ) - { - for( j = 0; j < buffer_elements; j++ ) - r[j] = (float) func.f_ff( s[j], s2[j] ); - } - else - { - for( j = 0; j < buffer_elements; j++ ) - { - feclearexcept(FE_OVERFLOW); - r[j] = (float) func.f_ff( s[j], s2[j] ); - overflow[j] = FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW)); - } - } - - if (gIsInRTZMode) - (void)set_round(oldRoundMode, kfloat); - - if( ftz ) - RestoreFPState( &oldMode ); - - // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue. - for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ ) - { - out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error); - if( error || NULL == out[j] ) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - goto exit; - } - } - - // Wait for the last buffer - out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error); - if( error || NULL == out[j] ) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - goto exit; - } - - //Verify data - t = (cl_uint *)r; - for( j = 0; j < buffer_elements; j++ ) - { - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - cl_uint *q = out[k]; - - // If we aren't getting the correctly rounded result - if( t[j] != q[j] ) - { - float test = ((float*) q)[j]; - double correct = func.f_ff( s[j], s2[j] ); - - // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow - if ( !gInfNanSupport) - { - // Note: no double rounding here. Reference functions calculate in single precision. - if( overflow[j] || - IsFloatInfinity(correct) || IsFloatNaN(correct) || - IsFloatInfinity(s2[j]) || IsFloatNaN(s2[j]) || - IsFloatInfinity(s[j]) || IsFloatNaN(s[j]) ) - continue; - } - - // Per section 10 paragraph 6, accept embedded devices always returning positive 0.0. - if (gIsEmbedded && (t[j] == 0x80000000) && (q[j] == 0x00000000)) continue; - - float err = Ulp_Error( test, correct ); - float errB = Ulp_Error( test, (float) correct ); - - int fail = ((!(fabsf(err) <= ulps)) && (!(fabsf(errB) <= ulps))); - if( fabsf( errB ) < fabsf(err ) ) - err = errB; - - if( fail && ftz ) - { - // retry per section 6.5.3.2 - if( IsFloatResultSubnormal(correct, ulps ) ) - { - fail = fail && ( test != 0.0f ); - if( ! fail ) - err = 0.0f; - } - - // retry per section 6.5.3.3 - if( IsFloatSubnormal( s[j] ) ) - { - double correct2, correct3; - float err2, err3; - - if( !gInfNanSupport ) - feclearexcept(FE_OVERFLOW); - - correct2 = func.f_ff( 0.0, s2[j] ); - correct3 = func.f_ff( -0.0, s2[j] ); - - // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow - if( !gInfNanSupport ) - { - if( fetestexcept(FE_OVERFLOW) ) - continue; - - // Note: no double rounding here. Reference functions calculate in single precision. - if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) || - IsFloatInfinity(correct3) || IsFloatNaN(correct3) ) - continue; - } - - err2 = Ulp_Error( test, correct2 ); - err3 = Ulp_Error( test, correct3 ); - fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - - // retry per section 6.5.3.4 - if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - - //try with both args as zero - if( IsFloatSubnormal( s2[j] ) ) - { - double correct4, correct5; - float err4, err5; - - if( !gInfNanSupport ) - feclearexcept(FE_OVERFLOW); - - correct2 = func.f_ff( 0.0, 0.0 ); - correct3 = func.f_ff( -0.0, 0.0 ); - correct4 = func.f_ff( 0.0, -0.0 ); - correct5 = func.f_ff( -0.0, -0.0 ); - - // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow - if( !gInfNanSupport ) - { - if( fetestexcept(FE_OVERFLOW) ) - continue; - - // Note: no double rounding here. Reference functions calculate in single precision. - if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) || - IsFloatInfinity(correct3) || IsFloatNaN(correct3) || - IsFloatInfinity(correct4) || IsFloatNaN(correct4) || - IsFloatInfinity(correct5) || IsFloatNaN(correct5) ) - continue; - } - - err2 = Ulp_Error( test, correct2 ); - err3 = Ulp_Error( test, correct3 ); - err4 = Ulp_Error( test, correct4 ); - err5 = Ulp_Error( test, correct5 ); - fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)) && - (!(fabsf(err4) <= ulps)) && (!(fabsf(err5) <= ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - if( fabsf( err4 ) < fabsf(err ) ) - err = err4; - if( fabsf( err5 ) < fabsf(err ) ) - err = err5; - - // retry per section 6.5.3.4 - if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) || - IsFloatResultSubnormal( correct4, ulps ) || IsFloatResultSubnormal( correct5, ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - } - } - else if(IsFloatSubnormal(s2[j]) ) - { - double correct2, correct3; - float err2, err3; - - if( !gInfNanSupport ) - feclearexcept(FE_OVERFLOW); - - correct2 = func.f_ff( s[j], 0.0 ); - correct3 = func.f_ff( s[j], -0.0 ); - - // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow - if ( !gInfNanSupport) - { - // Note: no double rounding here. Reference functions calculate in single precision. - if( overflow[j] || - IsFloatInfinity(correct) || IsFloatNaN(correct) || - IsFloatInfinity(correct2)|| IsFloatNaN(correct2) ) - continue; - } - - err2 = Ulp_Error( test, correct2 ); - err3 = Ulp_Error( test, correct3 ); - fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - - // retry per section 6.5.3.4 - if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - } - } - - - if( fabsf(err ) > tinfo->maxError ) - { - tinfo->maxError = fabsf(err); - tinfo->maxErrorValue = s[j]; - tinfo->maxErrorValue2 = s2[j]; - } - if( fail ) - { - vlog_error( "\nERROR: %s%s: %f ulp error at {%a, %a}: *%a vs. %a (0x%8.8x) at index: %d\n", name, sizeNames[k], err, s[j], s2[j], r[j], test, ((cl_uint*)&test)[0], j ); - error = -1; - goto exit; - } - } - } - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) ) - { - vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error ); - return error; - } - } - - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush 3 failed\n" ); - - - if( 0 == ( base & 0x0fffffff) ) - { - if (gVerboseBruteForce) - { - vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->ulps, job->threadCount); - } else - { - vlog("." ); - } - fflush(stdout); - } -exit: - if( overflow ) - free( overflow ); - return error; -} - - -// A table of more difficult cases to get right -static const double specialValuesDouble[] = { - -NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10), - MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100., -4.0, -3.5, - -3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53), - MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55), - MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074), - MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074), - MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0, - - +NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10), - MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100., +4.0, +3.5, - +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53), - MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55), - MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074), - MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074), - MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0, -}; - -static size_t specialValuesDoubleCount = sizeof( specialValuesDouble ) / sizeof( specialValuesDouble[0] ); - -static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *p ); - -int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d, - bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - size_t i, j; - float maxError = 0.0f; - double maxErrorVal = 0.0; - double maxErrorVal2 = 0.0; - logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); - - // Init test_info - memset( &test_info, 0, sizeof( test_info ) ); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = 1; - if (gWimpyMode) - { - test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor; - } - - test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - //there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ulps = f->double_ulps; - test_info.ftz = f->ftz || gForceFTZ; - - // cl_kernels aren't thread safe, so we make one for each vector size for every thread - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - { - size_t array_size = test_info.threadCount * sizeof( cl_kernel ); - test_info.k[i] = (cl_kernel*)malloc( array_size ); - if( NULL == test_info.k[i] ) - { - vlog_error( "Error: Unable to allocate storage for kernels!\n" ); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset( test_info.k[i], 0, array_size ); - } - test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) ); - if( NULL == test_info.tinfo ) - { - vlog_error( "Error: Unable to allocate storage for thread specific data.\n" ); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) ); - for( i = 0; i < test_info.threadCount; i++ ) - { - cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_double), test_info.subBufferSize * sizeof( cl_double) }; - test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if( error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if( error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if( error || NULL == test_info.tinfo[i].outBuf[j] ) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - } - test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error); - if( NULL == test_info.tinfo[i].tQueue || error ) - { - vlog_error( "clCreateCommandQueue failed. (%d)\n", error ); - goto exit; - } - - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); - } - - - // Init the kernels - { - BuildKernelInfo build_info = { gMinVectorSizeIndex, - test_info.threadCount, - test_info.k, - test_info.programs, - f->name, - f->nameInCode, - relaxedMode }; - if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) - goto exit; - } - - if( !gSkipCorrectnessTesting ) - { - error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info ); - - // Accumulate the arithmetic errors - for( i = 0; i < test_info.threadCount; i++ ) - { - if( test_info.tinfo[i].maxError > maxError ) - { - maxError = test_info.tinfo[i].maxError; - maxErrorVal = test_info.tinfo[i].maxErrorValue; - maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; - } - } - - if( error ) - goto exit; - - if( gWimpyMode ) - vlog( "Wimp pass" ); - else - vlog( "passed" ); - } - - - if( gMeasureTimes ) - { - //Init input arrays - double *p = (double *)gIn; - double *p2 = (double *)gIn2; - for( j = 0; j < BUFFER_SIZE / sizeof( cl_double ); j++ ) - { - p[j] = DoubleFromUInt32(genrand_int32(d)); - p2[j] = DoubleFromUInt32(genrand_int32(d)); - } - - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error ); - return error; - } - - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeof( cl_double ) * sizeValues[j]; - size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; // BUFFER_SIZE / vectorSize rounded up - if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; } - if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; } - if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; } - - double sum = 0.0; - double bestTime = INFINITY; - for( i = 0; i < PERF_LOOP_COUNT; i++ ) - { - uint64_t startTime = GetTime(); - if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - - // Make sure OpenCL is done - if( (error = clFinish(gQueue) ) ) - { - vlog_error( "Error %d at clFinish\n", error ); - goto exit; - } - - uint64_t endTime = GetTime(); - double time = SubtractTime( endTime, startTime ); - sum += time; - if( time < bestTime ) - bestTime = time; - } - - if( gReportAverageTimes ) - bestTime = sum / PERF_LOOP_COUNT; - double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( double ) ); - vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] ); - } - for( ; j < gMaxVectorSizeIndex; j++ ) - vlog( "\t -- " ); - } - - if( ! gSkipCorrectnessTesting ) - vlog( "\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2 ); - vlog( "\n" ); - - -exit: - // Release - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - { - clReleaseProgram(test_info.programs[i]); - if( test_info.k[i] ) - { - for( j = 0; j < test_info.threadCount; j++ ) - clReleaseKernel(test_info.k[i][j]); - - free( test_info.k[i] ); - } - } - if( test_info.tinfo ) - { - for( i = 0; i < test_info.threadCount; i++ ) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free( test_info.tinfo ); - } - - return error; -} - -static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data ) -{ - const TestInfo *job = (const TestInfo *) data; - size_t buffer_elements = job->subBufferSize; - size_t buffer_size = buffer_elements * sizeof( cl_double ); - cl_uint base = job_id * (cl_uint) job->step; - ThreadInfo *tinfo = job->tinfo + thread_id; - float ulps = job->ulps; - dptr func = job->f->dfunc; - int ftz = job->ftz; - bool relaxedMode = job->relaxedMode; - MTdata d = tinfo->d; - cl_uint j, k; - cl_int error; - const char *name = job->f->name; - cl_ulong *t; - cl_double *r,*s,*s2; - - Force64BitFPUPrecision(); - - // start the map of the output arrays - cl_event e[ VECTOR_SIZE_COUNT ]; - cl_ulong *out[ VECTOR_SIZE_COUNT ]; - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error); - if( error || NULL == out[j]) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - return error; - } - } - - // Get that moving - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush failed\n" ); - - //Init input array - cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements; - cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements; - j = 0; - int totalSpecialValueCount = specialValuesDoubleCount * specialValuesDoubleCount; - int indx = (totalSpecialValueCount - 1) / buffer_elements; - - if( job_id <= (cl_uint)indx ) - { // test edge cases - cl_double *fp = (cl_double *)p; - cl_double *fp2 = (cl_double *)p2; - uint32_t x, y; - - x = (job_id * buffer_elements) % specialValuesDoubleCount; - y = (job_id * buffer_elements) / specialValuesDoubleCount; - - for( ; j < buffer_elements; j++ ) - { - fp[j] = specialValuesDouble[x]; - fp2[j] = specialValuesDouble[y]; - if( ++x >= specialValuesDoubleCount ) - { - x = 0; - y++; - if( y >= specialValuesDoubleCount ) - break; - } - } - } - - //Init any remaining values. - for( ; j < buffer_elements; j++ ) - { - p[j] = genrand_int64(d); - p2[j] = genrand_int64(d); - } - - if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error ); - goto exit; - } - - if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error ); - goto exit; - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - //Wait for the map to finish - if( (error = clWaitForEvents(1, e + j) )) - { - vlog_error( "Error: clWaitForEvents failed! err: %d\n", error ); - goto exit; - } - if( (error = clReleaseEvent( e[j] ) )) - { - vlog_error( "Error: clReleaseEvent failed! err: %d\n", error ); - goto exit; - } - - // Fill the result buffer with garbage, so that old results don't carry over - uint32_t pattern = 0xffffdead; - memset_pattern4(out[j], &pattern, buffer_size); - if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error ); - goto exit; - } - - // run the kernel - size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j]; - cl_kernel kernel = job->k[j][thread_id]; //each worker thread has its own copy of the cl_kernel - cl_program program = job->programs[j]; - - if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; } - if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; } - if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; } - - if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL))) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - } - - // Get that moving - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush 2 failed\n" ); - - if( gSkipCorrectnessTesting ) - return CL_SUCCESS; - - //Calculate the correctly rounded reference result - r = (cl_double *)gOut_Ref + thread_id * buffer_elements; - s = (cl_double *)gIn + thread_id * buffer_elements; - s2 = (cl_double *)gIn2 + thread_id * buffer_elements; - for( j = 0; j < buffer_elements; j++ ) - r[j] = (cl_double) func.f_ff( s[j], s2[j] ); - - // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue. - for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ ) - { - out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error); - if( error || NULL == out[j] ) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - goto exit; - } - } - - // Wait for the last buffer - out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error); - if( error || NULL == out[j] ) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - goto exit; - } - - //Verify data - t = (cl_ulong *)r; - for( j = 0; j < buffer_elements; j++ ) - { - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - cl_ulong *q = out[k]; - - // If we aren't getting the correctly rounded result - if( t[j] != q[j] ) - { - cl_double test = ((cl_double*) q)[j]; - long double correct = func.f_ff( s[j], s2[j] ); - float err = Bruteforce_Ulp_Error_Double( test, correct ); - int fail = ! (fabsf(err) <= ulps); - - if( fail && ftz ) - { - // retry per section 6.5.3.2 - if( IsDoubleResultSubnormal(correct, ulps ) ) - { - fail = fail && ( test != 0.0f ); - if( ! fail ) - err = 0.0f; - } - - - // retry per section 6.5.3.3 - if( IsDoubleSubnormal( s[j] ) ) - { - long double correct2 = func.f_ff( 0.0, s2[j] ); - long double correct3 = func.f_ff( -0.0, s2[j] ); - float err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); - float err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); - fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - - // retry per section 6.5.3.4 - if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - - //try with both args as zero - if( IsDoubleSubnormal( s2[j] ) ) - { - correct2 = func.f_ff( 0.0, 0.0 ); - correct3 = func.f_ff( -0.0, 0.0 ); - long double correct4 = func.f_ff( 0.0, -0.0 ); - long double correct5 = func.f_ff( -0.0, -0.0 ); - err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); - err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); - float err4 = Bruteforce_Ulp_Error_Double( test, correct4 ); - float err5 = Bruteforce_Ulp_Error_Double( test, correct5 ); - fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)) && - (!(fabsf(err4) <= ulps)) && (!(fabsf(err5) <= ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - if( fabsf( err4 ) < fabsf(err ) ) - err = err4; - if( fabsf( err5 ) < fabsf(err ) ) - err = err5; - - // retry per section 6.5.3.4 - if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) || - IsDoubleResultSubnormal( correct4, ulps ) || IsDoubleResultSubnormal( correct5, ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - } - } - else if(IsDoubleSubnormal(s2[j]) ) - { - long double correct2 = func.f_ff( s[j], 0.0 ); - long double correct3 = func.f_ff( s[j], -0.0 ); - float err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); - float err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); - fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - - // retry per section 6.5.3.4 - if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - } - } - - if( fabsf(err ) > tinfo->maxError ) - { - tinfo->maxError = fabsf(err); - tinfo->maxErrorValue = s[j]; - tinfo->maxErrorValue2 = s2[j]; - } - if( fail ) - { - vlog_error( "\nERROR: %s%s: %f ulp error at {%a, %a}: *%a vs. %a\n", name, sizeNames[k], err, s[j], s2[j], r[j], test ); - error = -1; - goto exit; - } - } - } - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) ) - { - vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error ); - return error; - } - } - - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush 3 failed\n" ); - - - if( 0 == ( base & 0x0fffffff) ) - { - if (gVerboseBruteForce) - { - vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->ulps, job->threadCount); - } else - { - vlog("." ); - } - fflush(stdout); - } - -exit: - return error; - -} - - - - diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp new file mode 100644 index 0000000000..a2b7d28bac --- /dev/null +++ b/test_conformance/math_brute_force/binary_double.cpp @@ -0,0 +1,809 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "common.h" +#include "function_list.h" +#include "test_functions.h" +#include "utility.h" + +#include + +namespace { + +const double twoToMinus1022 = MAKE_HEX_DOUBLE(0x1p-1022, 1, -1022); + +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) +{ + const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global double", + sizeNames[vectorSize], + "* out, __global double", + sizeNames[vectorSize], + "* in1, __global double", + sizeNames[vectorSize], + "* in2 )\n" + "{\n" + " size_t i = get_global_id(0);\n" + " out[i] = ", + name, + "( in1[i], in2[i] );\n" + "}\n" }; + + const char *c3[] = { + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global double* out, __global double* in, __global double* in2)\n" + "{\n" + " size_t i = get_global_id(0);\n" + " if( i + 1 < get_global_size(0) )\n" + " {\n" + " double3 d0 = vload3( 0, in + 3 * i );\n" + " double3 d1 = vload3( 0, in2 + 3 * i );\n" + " d0 = ", + name, + "( d0, d1 );\n" + " vstore3( d0, 0, out + 3*i );\n" + " }\n" + " else\n" + " {\n" + " size_t parity = i & 1; // Figure out how many elements are " + "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " + "buffer size \n" + " double3 d0;\n" + " double3 d1;\n" + " switch( parity )\n" + " {\n" + " case 1:\n" + " d0 = (double3)( in[3*i], NAN, NAN ); \n" + " d1 = (double3)( in2[3*i], NAN, NAN ); \n" + " break;\n" + " case 0:\n" + " d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n" + " d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n" + " break;\n" + " }\n" + " d0 = ", + name, + "( d0, d1 );\n" + " switch( parity )\n" + " {\n" + " case 0:\n" + " out[3*i+1] = d0.y; \n" + " // fall through\n" + " case 1:\n" + " out[3*i] = d0.x; \n" + " break;\n" + " }\n" + " }\n" + "}\n" + }; + + const char **kern = c; + size_t kernSize = sizeof(c) / sizeof(c[0]); + + if (sizeValues[vectorSize] == 3) + { + kern = c3; + kernSize = sizeof(c3) / sizeof(c3[0]); + } + + char testName[32]; + snprintf(testName, sizeof(testName) - 1, "math_kernel%s", + sizeNames[vectorSize]); + + return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, + relaxedMode); +} + +struct BuildKernelInfo +{ + cl_uint offset; // the first vector size to build + cl_uint kernel_count; + KernelMatrix &kernels; + cl_program *programs; + const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. +}; + +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +{ + BuildKernelInfo *info = (BuildKernelInfo *)p; + cl_uint i = info->offset + job_id; + return BuildKernel(info->nameInCode, i, info->kernel_count, + info->kernels[i].data(), info->programs + i, + info->relaxedMode); +} + +// Thread specific data for a worker thread +struct ThreadInfo +{ + cl_mem inBuf; // input buffer for the thread + cl_mem inBuf2; // input buffer for the thread + cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread + float maxError; // max error value. Init to 0. + double + maxErrorValue; // position of the max error value (param 1). Init to 0. + double maxErrorValue2; // position of the max error value (param 2). Init + // to 0. + MTdata d; + cl_command_queue tQueue; // per thread command queue to improve performance +}; + +struct TestInfo +{ + size_t subBufferSize; // Size of the sub-buffer in elements + const Func *f; // A pointer to the function info + cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes + + // Thread-specific kernels for each vector size: + // k[vector_size][thread_id] + KernelMatrix k; + + // Array of thread specific information + std::vector tinfo; + + cl_uint threadCount; // Number of worker threads + cl_uint jobCount; // Number of jobs + cl_uint step; // step between each chunk and the next. + cl_uint scale; // stride between individual test values + float ulps; // max_allowed ulps + int ftz; // non-zero if running in flush to zero mode + + int isFDim; + int skipNanInf; + int isNextafter; + bool relaxedMode; // True if test is running in relaxed mode, false + // otherwise. +}; + +// A table of more difficult cases to get right +const double specialValues[] = { + -NAN, + -INFINITY, + -DBL_MAX, + MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), + MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), + MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), + MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10), + MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), + MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), + MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), + MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), + -1000.0, + -100.0, + -4.0, + -3.5, + -3.0, + MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), + -2.5, + MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), + -2.0, + MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), + -1.5, + MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52), + MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), + -1.0, + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53), + MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), + -0.5, + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54), + MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), + -0.25, + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55), + MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), + -DBL_MIN, + MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), + MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), + MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), + MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), + MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), + -0.0, + + +NAN, + +INFINITY, + +DBL_MAX, + MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), + MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11), + MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), + MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10), + MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), + MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), + MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), + MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), + +1000.0, + +100.0, + +4.0, + +3.5, + +3.0, + MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), + +2.5, + MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), + +2.0, + MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), + +1.5, + MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52), + MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), + +1.0, + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53), + MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), + +0.5, + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54), + MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), + +0.25, + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55), + MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), + +DBL_MIN, + MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), + MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), + MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), + MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), + MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), + +0.0, +}; + +constexpr size_t specialValuesCount = + sizeof(specialValues) / sizeof(specialValues[0]); + +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +{ + TestInfo *job = (TestInfo *)data; + size_t buffer_elements = job->subBufferSize; + size_t buffer_size = buffer_elements * sizeof(cl_double); + cl_uint base = job_id * (cl_uint)job->step; + ThreadInfo *tinfo = &(job->tinfo[thread_id]); + float ulps = job->ulps; + dptr func = job->f->dfunc; + int ftz = job->ftz; + MTdata d = tinfo->d; + cl_int error; + const char *name = job->f->name; + + int isNextafter = job->isNextafter; + cl_ulong *t; + cl_double *r; + cl_double *s; + cl_double *s2; + + Force64BitFPUPrecision(); + + // start the map of the output arrays + cl_event e[VECTOR_SIZE_COUNT]; + cl_ulong *out[VECTOR_SIZE_COUNT]; + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + out[j] = (cl_ulong *)clEnqueueMapBuffer( + tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, + buffer_size, 0, NULL, e + j, &error); + if (error || NULL == out[j]) + { + vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, + error); + return error; + } + } + + // Get that moving + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n"); + + // Init input array + cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements; + cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements; + cl_uint idx = 0; + int totalSpecialValueCount = specialValuesCount * specialValuesCount; + int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements; + + if (job_id <= (cl_uint)lastSpecialJobIndex) + { // test edge cases + cl_double *fp = (cl_double *)p; + cl_double *fp2 = (cl_double *)p2; + uint32_t x, y; + + x = (job_id * buffer_elements) % specialValuesCount; + y = (job_id * buffer_elements) / specialValuesCount; + + for (; idx < buffer_elements; idx++) + { + fp[idx] = specialValues[x]; + fp2[idx] = specialValues[y]; + if (++x >= specialValuesCount) + { + x = 0; + y++; + if (y >= specialValuesCount) break; + } + } + } + + // Init any remaining values. + for (; idx < buffer_elements; idx++) + { + p[idx] = genrand_int64(d); + p2[idx] = genrand_int64(d); + } + + if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, + buffer_size, p, 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error); + goto exit; + } + + if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, + buffer_size, p2, 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + // Wait for the map to finish + if ((error = clWaitForEvents(1, e + j))) + { + vlog_error("Error: clWaitForEvents failed! err: %d\n", error); + goto exit; + } + if ((error = clReleaseEvent(e[j]))) + { + vlog_error("Error: clReleaseEvent failed! err: %d\n", error); + goto exit; + } + + // Fill the result buffer with garbage, so that old results don't carry + // over + uint32_t pattern = 0xffffdead; + memset_pattern4(out[j], &pattern, buffer_size); + if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], + out[j], 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error); + goto exit; + } + + // run the kernel + size_t vectorCount = + (buffer_elements + sizeValues[j] - 1) / sizeValues[j]; + cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its + // own copy of the cl_kernel + cl_program program = job->programs[j]; + + if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]), + &tinfo->outBuf[j]))) + { + LogBuildError(program); + return error; + } + if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), + &tinfo->inBuf))) + { + LogBuildError(program); + return error; + } + if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), + &tinfo->inBuf2))) + { + LogBuildError(program); + return error; + } + + if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, + &vectorCount, NULL, 0, NULL, NULL))) + { + vlog_error("FAILED -- could not execute kernel\n"); + goto exit; + } + } + + // Get that moving + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n"); + + if (gSkipCorrectnessTesting) return CL_SUCCESS; + + // Calculate the correctly rounded reference result + r = (cl_double *)gOut_Ref + thread_id * buffer_elements; + s = (cl_double *)gIn + thread_id * buffer_elements; + s2 = (cl_double *)gIn2 + thread_id * buffer_elements; + for (size_t j = 0; j < buffer_elements; j++) + r[j] = (cl_double)func.f_ff(s[j], s2[j]); + + // Read the data back -- no need to wait for the first N-1 buffers but wait + // for the last buffer. This is an in order queue. + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE; + out[j] = (cl_ulong *)clEnqueueMapBuffer( + tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0, + buffer_size, 0, NULL, NULL, &error); + if (error || NULL == out[j]) + { + vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, + error); + goto exit; + } + } + + // Verify data + t = (cl_ulong *)r; + for (size_t j = 0; j < buffer_elements; j++) + { + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + cl_ulong *q = out[k]; + + // If we aren't getting the correctly rounded result + if (t[j] != q[j]) + { + cl_double test = ((cl_double *)q)[j]; + long double correct = func.f_ff(s[j], s2[j]); + float err = Bruteforce_Ulp_Error_Double(test, correct); + int fail = !(fabsf(err) <= ulps); + + if (fail && ftz) + { + // retry per section 6.5.3.2 + if (IsDoubleResultSubnormal(correct, ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + + // nextafter on FTZ platforms may return the smallest + // normal float (2^-126) given a denormal or a zero + // as the first argument. The rationale here is that + // nextafter flushes the argument to zero and then + // returns the next representable number in the + // direction of the second argument, and since + // denorms are considered as zero, the smallest + // normal number is the next representable number. + // In which case, it should have the same sign as the + // second argument. + if (isNextafter) + { + if (IsDoubleSubnormal(s[j]) || s[j] == 0.0f) + { + cl_double value = copysign(twoToMinus1022, s2[j]); + fail = fail && (test != value); + if (!fail) err = 0.0f; + } + } + else + { + // retry per section 6.5.3.3 + if (IsDoubleSubnormal(s[j])) + { + long double correct2 = func.f_ff(0.0, s2[j]); + long double correct3 = func.f_ff(-0.0, s2[j]); + float err2 = + Bruteforce_Ulp_Error_Double(test, correct2); + float err3 = + Bruteforce_Ulp_Error_Double(test, correct3); + fail = fail + && ((!(fabsf(err2) <= ulps)) + && (!(fabsf(err3) <= ulps))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + + // retry per section 6.5.3.4 + if (IsDoubleResultSubnormal(correct2, ulps) + || IsDoubleResultSubnormal(correct3, ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + + // try with both args as zero + if (IsDoubleSubnormal(s2[j])) + { + correct2 = func.f_ff(0.0, 0.0); + correct3 = func.f_ff(-0.0, 0.0); + long double correct4 = func.f_ff(0.0, -0.0); + long double correct5 = func.f_ff(-0.0, -0.0); + err2 = + Bruteforce_Ulp_Error_Double(test, correct2); + err3 = + Bruteforce_Ulp_Error_Double(test, correct3); + float err4 = + Bruteforce_Ulp_Error_Double(test, correct4); + float err5 = + Bruteforce_Ulp_Error_Double(test, correct5); + fail = fail + && ((!(fabsf(err2) <= ulps)) + && (!(fabsf(err3) <= ulps)) + && (!(fabsf(err4) <= ulps)) + && (!(fabsf(err5) <= ulps))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + if (fabsf(err4) < fabsf(err)) err = err4; + if (fabsf(err5) < fabsf(err)) err = err5; + + // retry per section 6.5.3.4 + if (IsDoubleResultSubnormal(correct2, ulps) + || IsDoubleResultSubnormal(correct3, ulps) + || IsDoubleResultSubnormal(correct4, ulps) + || IsDoubleResultSubnormal(correct5, ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + } + } + else if (IsDoubleSubnormal(s2[j])) + { + long double correct2 = func.f_ff(s[j], 0.0); + long double correct3 = func.f_ff(s[j], -0.0); + float err2 = + Bruteforce_Ulp_Error_Double(test, correct2); + float err3 = + Bruteforce_Ulp_Error_Double(test, correct3); + fail = fail + && ((!(fabsf(err2) <= ulps)) + && (!(fabsf(err3) <= ulps))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + + // retry per section 6.5.3.4 + if (IsDoubleResultSubnormal(correct2, ulps) + || IsDoubleResultSubnormal(correct3, ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + } + } + } + + if (fabsf(err) > tinfo->maxError) + { + tinfo->maxError = fabsf(err); + tinfo->maxErrorValue = s[j]; + tinfo->maxErrorValue2 = s2[j]; + } + if (fail) + { + vlog_error("\nERROR: %s%s: %f ulp error at {%.13la, " + "%.13la}: *%.13la vs. %.13la\n", + name, sizeNames[k], err, s[j], s2[j], r[j], + test); + error = -1; + goto exit; + } + } + } + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], + out[j], 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", + j, error); + return error; + } + } + + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n"); + + + if (0 == (base & 0x0fffffff)) + { + if (gVerboseBruteForce) + { + vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f " + "ThreadCount:%2u\n", + base, job->step, job->scale, buffer_elements, job->ulps, + job->threadCount); + } + else + { + vlog("."); + } + fflush(stdout); + } + +exit: + return error; +} + +} // anonymous namespace + +int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info{}; + cl_int error; + float maxError = 0.0f; + double maxErrorVal = 0.0; + double maxErrorVal2 = 0.0; + + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); + + // Init test_info + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_double)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ulps = f->double_ulps; + test_info.ftz = f->ftz || gForceFTZ; + + test_info.isFDim = 0 == strcmp("fdim", f->nameInCode); + test_info.skipNanInf = 0; + test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode); + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + test_info.k[i].resize(test_info.threadCount, nullptr); + } + + test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_double), + test_info.subBufferSize * sizeof(cl_double) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + test_info.tinfo[i].inBuf2 = + clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf2) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + + test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + // Accumulate the arithmetic errors + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + if (test_info.tinfo[i].maxError > maxError) + { + maxError = test_info.tinfo[i].maxError; + maxErrorVal = test_info.tinfo[i].maxErrorValue; + maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; + } + } + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + for (auto &kernel : test_info.k[i]) + { + clReleaseKernel(kernel); + } + } + + for (auto &threadInfo : test_info.tinfo) + { + free_mtdata(threadInfo.d); + clReleaseMemObject(threadInfo.inBuf); + clReleaseMemObject(threadInfo.inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(threadInfo.outBuf[j]); + clReleaseCommandQueue(threadInfo.tQueue); + } + + return error; +} diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp new file mode 100644 index 0000000000..97712ee8b9 --- /dev/null +++ b/test_conformance/math_brute_force/binary_float.cpp @@ -0,0 +1,968 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "common.h" +#include "function_list.h" +#include "test_functions.h" +#include "utility.h" + +#include + +namespace { + +const float twoToMinus126 = MAKE_HEX_FLOAT(0x1p-126f, 1, -126); + +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) +{ + const char *c[] = { "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global float", + sizeNames[vectorSize], + "* out, __global float", + sizeNames[vectorSize], + "* in1, __global float", + sizeNames[vectorSize], + "* in2 )\n" + "{\n" + " size_t i = get_global_id(0);\n" + " out[i] = ", + name, + "( in1[i], in2[i] );\n" + "}\n" }; + + const char *c3[] = { + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global float* out, __global float* in, __global float* in2)\n" + "{\n" + " size_t i = get_global_id(0);\n" + " if( i + 1 < get_global_size(0) )\n" + " {\n" + " float3 f0 = vload3( 0, in + 3 * i );\n" + " float3 f1 = vload3( 0, in2 + 3 * i );\n" + " f0 = ", + name, + "( f0, f1 );\n" + " vstore3( f0, 0, out + 3*i );\n" + " }\n" + " else\n" + " {\n" + " size_t parity = i & 1; // Figure out how many elements are " + "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " + "buffer size \n" + " float3 f0;\n" + " float3 f1;\n" + " switch( parity )\n" + " {\n" + " case 1:\n" + " f0 = (float3)( in[3*i], NAN, NAN ); \n" + " f1 = (float3)( in2[3*i], NAN, NAN ); \n" + " break;\n" + " case 0:\n" + " f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n" + " f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n" + " break;\n" + " }\n" + " f0 = ", + name, + "( f0, f1 );\n" + " switch( parity )\n" + " {\n" + " case 0:\n" + " out[3*i+1] = f0.y; \n" + " // fall through\n" + " case 1:\n" + " out[3*i] = f0.x; \n" + " break;\n" + " }\n" + " }\n" + "}\n" + }; + + const char **kern = c; + size_t kernSize = sizeof(c) / sizeof(c[0]); + + if (sizeValues[vectorSize] == 3) + { + kern = c3; + kernSize = sizeof(c3) / sizeof(c3[0]); + } + + char testName[32]; + snprintf(testName, sizeof(testName) - 1, "math_kernel%s", + sizeNames[vectorSize]); + + return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, + relaxedMode); +} + +struct BuildKernelInfo +{ + cl_uint offset; // the first vector size to build + cl_uint kernel_count; + KernelMatrix &kernels; + cl_program *programs; + const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. +}; + +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +{ + BuildKernelInfo *info = (BuildKernelInfo *)p; + cl_uint i = info->offset + job_id; + return BuildKernel(info->nameInCode, i, info->kernel_count, + info->kernels[i].data(), info->programs + i, + info->relaxedMode); +} + +// Thread specific data for a worker thread +struct ThreadInfo +{ + cl_mem inBuf; // input buffer for the thread + cl_mem inBuf2; // input buffer for the thread + cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread + float maxError; // max error value. Init to 0. + double + maxErrorValue; // position of the max error value (param 1). Init to 0. + double maxErrorValue2; // position of the max error value (param 2). Init + // to 0. + MTdata d; + cl_command_queue tQueue; // per thread command queue to improve performance +}; + +struct TestInfo +{ + size_t subBufferSize; // Size of the sub-buffer in elements + const Func *f; // A pointer to the function info + cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes + + // Thread-specific kernels for each vector size: + // k[vector_size][thread_id] + KernelMatrix k; + + // Array of thread specific information + std::vector tinfo; + + cl_uint threadCount; // Number of worker threads + cl_uint jobCount; // Number of jobs + cl_uint step; // step between each chunk and the next. + cl_uint scale; // stride between individual test values + float ulps; // max_allowed ulps + int ftz; // non-zero if running in flush to zero mode + + int isFDim; + int skipNanInf; + int isNextafter; + bool relaxedMode; // True if test is running in relaxed mode, false + // otherwise. +}; + +// A table of more difficult cases to get right +const float specialValues[] = { + -NAN, + -INFINITY, + -FLT_MAX, + MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), + MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), + MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39), + MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), + MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), + MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38), + MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), + MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), + MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), + MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), + MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), + MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), + -1000.f, + -100.f, + -4.0f, + -3.5f, + -3.0f, + MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), + -2.5f, + MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), + -2.0f, + MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), + -1.5f, + MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24), + MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), + -1.0f, + MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25), + MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), + -0.5f, + MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), + MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), + -0.25f, + MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27), + MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), + -FLT_MIN, + MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), + MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), + MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), + MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), + MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), + MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150), + MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), + MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), + MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), + MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), + -0.0f, + + +NAN, + +INFINITY, + +FLT_MAX, + MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), + MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), + MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), + MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), + MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), + MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38), + MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), + MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), + MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), + MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), + MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), + MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), + +1000.f, + +100.f, + +4.0f, + +3.5f, + +3.0f, + MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), + 2.5f, + MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23), + +2.0f, + MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), + 1.5f, + MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), + MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), + +1.0f, + MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25), + MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), + +0.5f, + MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), + MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), + +0.25f, + MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27), + MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), + +FLT_MIN, + MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), + MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), + MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), + MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), + MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), + MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150), + MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), + MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), + MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), + MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), + +0.0f, +}; + +constexpr size_t specialValuesCount = + sizeof(specialValues) / sizeof(specialValues[0]); + +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +{ + TestInfo *job = (TestInfo *)data; + size_t buffer_elements = job->subBufferSize; + size_t buffer_size = buffer_elements * sizeof(cl_float); + cl_uint base = job_id * (cl_uint)job->step; + ThreadInfo *tinfo = &(job->tinfo[thread_id]); + fptr func = job->f->func; + int ftz = job->ftz; + bool relaxedMode = job->relaxedMode; + float ulps = getAllowedUlpError(job->f, relaxedMode); + MTdata d = tinfo->d; + cl_int error; + std::vector overflow(buffer_elements, false); + const char *name = job->f->name; + int isFDim = job->isFDim; + int skipNanInf = job->skipNanInf; + int isNextafter = job->isNextafter; + cl_uint *t = 0; + cl_float *r = 0; + cl_float *s = 0; + cl_float *s2 = 0; + cl_int copysign_test = 0; + RoundingMode oldRoundMode; + int skipVerification = 0; + + if (relaxedMode) + { + func = job->f->rfunc; + if (strcmp(name, "pow") == 0 && gFastRelaxedDerived) + { + ulps = INFINITY; + skipVerification = 1; + } + } + + // start the map of the output arrays + cl_event e[VECTOR_SIZE_COUNT]; + cl_uint *out[VECTOR_SIZE_COUNT]; + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + out[j] = (cl_uint *)clEnqueueMapBuffer( + tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, + buffer_size, 0, NULL, e + j, &error); + if (error || NULL == out[j]) + { + vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, + error); + return error; + } + } + + // Get that moving + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n"); + + // Init input array + cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements; + cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements; + cl_uint idx = 0; + int totalSpecialValueCount = specialValuesCount * specialValuesCount; + int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements; + + if (job_id <= (cl_uint)lastSpecialJobIndex) + { // test edge cases + float *fp = (float *)p; + float *fp2 = (float *)p2; + uint32_t x, y; + + x = (job_id * buffer_elements) % specialValuesCount; + y = (job_id * buffer_elements) / specialValuesCount; + + for (; idx < buffer_elements; idx++) + { + fp[idx] = specialValues[x]; + fp2[idx] = specialValues[y]; + ++x; + if (x >= specialValuesCount) + { + x = 0; + y++; + if (y >= specialValuesCount) break; + } + } + } + + // Init any remaining values. + for (; idx < buffer_elements; idx++) + { + p[idx] = genrand_int32(d); + p2[idx] = genrand_int32(d); + } + + if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, + buffer_size, p, 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error); + goto exit; + } + + if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, + buffer_size, p2, 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + // Wait for the map to finish + if ((error = clWaitForEvents(1, e + j))) + { + vlog_error("Error: clWaitForEvents failed! err: %d\n", error); + goto exit; + } + if ((error = clReleaseEvent(e[j]))) + { + vlog_error("Error: clReleaseEvent failed! err: %d\n", error); + goto exit; + } + + // Fill the result buffer with garbage, so that old results don't carry + // over + uint32_t pattern = 0xffffdead; + memset_pattern4(out[j], &pattern, buffer_size); + if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], + out[j], 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error); + goto exit; + } + + // run the kernel + size_t vectorCount = + (buffer_elements + sizeValues[j] - 1) / sizeValues[j]; + cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its + // own copy of the cl_kernel + cl_program program = job->programs[j]; + + if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]), + &tinfo->outBuf[j]))) + { + LogBuildError(program); + return error; + } + if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), + &tinfo->inBuf))) + { + LogBuildError(program); + return error; + } + if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), + &tinfo->inBuf2))) + { + LogBuildError(program); + return error; + } + + if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, + &vectorCount, NULL, 0, NULL, NULL))) + { + vlog_error("FAILED -- could not execute kernel\n"); + goto exit; + } + } + + // Get that moving + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n"); + + if (gSkipCorrectnessTesting) + { + if ((error = clFinish(tinfo->tQueue))) + { + vlog_error("Error: clFinish failed! err: %d\n", error); + goto exit; + } + return CL_SUCCESS; + } + + FPU_mode_type oldMode; + oldRoundMode = kRoundToNearestEven; + if (isFDim) + { + // Calculate the correctly rounded reference result + memset(&oldMode, 0, sizeof(oldMode)); + if (ftz) ForceFTZ(&oldMode); + + // Set the rounding mode to match the device + if (gIsInRTZMode) oldRoundMode = set_round(kRoundTowardZero, kfloat); + } + + if (!strcmp(name, "copysign")) copysign_test = 1; + +#define ref_func(s, s2) (copysign_test ? func.f_ff_f(s, s2) : func.f_ff(s, s2)) + + // Calculate the correctly rounded reference result + r = (float *)gOut_Ref + thread_id * buffer_elements; + s = (float *)gIn + thread_id * buffer_elements; + s2 = (float *)gIn2 + thread_id * buffer_elements; + if (skipNanInf) + { + for (size_t j = 0; j < buffer_elements; j++) + { + feclearexcept(FE_OVERFLOW); + r[j] = (float)ref_func(s[j], s2[j]); + overflow[j] = + FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW)); + } + } + else + { + for (size_t j = 0; j < buffer_elements; j++) + r[j] = (float)ref_func(s[j], s2[j]); + } + + if (isFDim && ftz) RestoreFPState(&oldMode); + + // Read the data back -- no need to wait for the first N-1 buffers but wait + // for the last buffer. This is an in order queue. + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE; + out[j] = (cl_uint *)clEnqueueMapBuffer( + tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0, + buffer_size, 0, NULL, NULL, &error); + if (error || NULL == out[j]) + { + vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, + error); + goto exit; + } + } + + if (!skipVerification) + { + // Verify data + t = (cl_uint *)r; + for (size_t j = 0; j < buffer_elements; j++) + { + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + cl_uint *q = out[k]; + + // If we aren't getting the correctly rounded result + if (t[j] != q[j]) + { + float test = ((float *)q)[j]; + double correct = ref_func(s[j], s2[j]); + + // Per section 10 paragraph 6, accept any result if an input + // or output is a infinity or NaN or overflow As per + // OpenCL 2.0 spec, section 5.8.4.3, enabling + // fast-relaxed-math mode also enables -cl-finite-math-only + // optimization. This optimization allows to assume that + // arguments and results are not NaNs or +/-INFs. Hence, + // accept any result if inputs or results are NaNs or INFs. + if (relaxedMode || skipNanInf) + { + if (skipNanInf && overflow[j]) continue; + // Note: no double rounding here. Reference functions + // calculate in single precision. + if (IsFloatInfinity(correct) || IsFloatNaN(correct) + || IsFloatInfinity(s2[j]) || IsFloatNaN(s2[j]) + || IsFloatInfinity(s[j]) || IsFloatNaN(s[j])) + continue; + } + + float err = Ulp_Error(test, correct); + int fail = !(fabsf(err) <= ulps); + + if (fail && ftz) + { + // retry per section 6.5.3.2 + if (IsFloatResultSubnormal(correct, ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + + // nextafter on FTZ platforms may return the smallest + // normal float (2^-126) given a denormal or a zero + // as the first argument. The rationale here is that + // nextafter flushes the argument to zero and then + // returns the next representable number in the + // direction of the second argument, and since + // denorms are considered as zero, the smallest + // normal number is the next representable number. + // In which case, it should have the same sign as the + // second argument. + if (isNextafter) + { + if (IsFloatSubnormal(s[j]) || s[j] == 0.0f) + { + float value = copysignf(twoToMinus126, s2[j]); + fail = fail && (test != value); + if (!fail) err = 0.0f; + } + } + else + { + // retry per section 6.5.3.3 + if (IsFloatSubnormal(s[j])) + { + double correct2, correct3; + float err2, err3; + + if (skipNanInf) feclearexcept(FE_OVERFLOW); + + correct2 = ref_func(0.0, s2[j]); + correct3 = ref_func(-0.0, s2[j]); + + // Per section 10 paragraph 6, accept any result + // if an input or output is a infinity or NaN or + // overflow As per OpenCL 2.0 spec, + // section 5.8.4.3, enabling fast-relaxed-math + // mode also enables -cl-finite-math-only + // optimization. This optimization allows to + // assume that arguments and results are not + // NaNs or +/-INFs. Hence, accept any result if + // inputs or results are NaNs or INFs. + if (relaxedMode || skipNanInf) + { + if (fetestexcept(FE_OVERFLOW) && skipNanInf) + continue; + + // Note: no double rounding here. Reference + // functions calculate in single precision. + if (IsFloatInfinity(correct2) + || IsFloatNaN(correct2) + || IsFloatInfinity(correct3) + || IsFloatNaN(correct3)) + continue; + } + + err2 = Ulp_Error(test, correct2); + err3 = Ulp_Error(test, correct3); + fail = fail + && ((!(fabsf(err2) <= ulps)) + && (!(fabsf(err3) <= ulps))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + + // retry per section 6.5.3.4 + if (IsFloatResultSubnormal(correct2, ulps) + || IsFloatResultSubnormal(correct3, ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + + // try with both args as zero + if (IsFloatSubnormal(s2[j])) + { + double correct4, correct5; + float err4, err5; + + if (skipNanInf) feclearexcept(FE_OVERFLOW); + + correct2 = ref_func(0.0, 0.0); + correct3 = ref_func(-0.0, 0.0); + correct4 = ref_func(0.0, -0.0); + correct5 = ref_func(-0.0, -0.0); + + // Per section 10 paragraph 6, accept any + // result if an input or output is a + // infinity or NaN or overflow As per + // OpenCL 2.0 spec, section 5.8.4.3, + // enabling fast-relaxed-math mode also + // enables -cl-finite-math-only + // optimization. This optimization allows to + // assume that arguments and results are not + // NaNs or +/-INFs. Hence, accept any result + // if inputs or results are NaNs or INFs. + if (relaxedMode || skipNanInf) + { + if (fetestexcept(FE_OVERFLOW) + && skipNanInf) + continue; + + // Note: no double rounding here. + // Reference functions calculate in + // single precision. + if (IsFloatInfinity(correct2) + || IsFloatNaN(correct2) + || IsFloatInfinity(correct3) + || IsFloatNaN(correct3) + || IsFloatInfinity(correct4) + || IsFloatNaN(correct4) + || IsFloatInfinity(correct5) + || IsFloatNaN(correct5)) + continue; + } + + err2 = Ulp_Error(test, correct2); + err3 = Ulp_Error(test, correct3); + err4 = Ulp_Error(test, correct4); + err5 = Ulp_Error(test, correct5); + fail = fail + && ((!(fabsf(err2) <= ulps)) + && (!(fabsf(err3) <= ulps)) + && (!(fabsf(err4) <= ulps)) + && (!(fabsf(err5) <= ulps))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + if (fabsf(err4) < fabsf(err)) err = err4; + if (fabsf(err5) < fabsf(err)) err = err5; + + // retry per section 6.5.3.4 + if (IsFloatResultSubnormal(correct2, ulps) + || IsFloatResultSubnormal(correct3, + ulps) + || IsFloatResultSubnormal(correct4, + ulps) + || IsFloatResultSubnormal(correct5, + ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + } + } + else if (IsFloatSubnormal(s2[j])) + { + double correct2, correct3; + float err2, err3; + + if (skipNanInf) feclearexcept(FE_OVERFLOW); + + correct2 = ref_func(s[j], 0.0); + correct3 = ref_func(s[j], -0.0); + + // Per section 10 paragraph 6, accept any result + // if an input or output is a infinity or NaN or + // overflow As per OpenCL 2.0 spec, + // section 5.8.4.3, enabling fast-relaxed-math + // mode also enables -cl-finite-math-only + // optimization. This optimization allows to + // assume that arguments and results are not + // NaNs or +/-INFs. Hence, accept any result if + // inputs or results are NaNs or INFs. + if (relaxedMode || skipNanInf) + { + // Note: no double rounding here. Reference + // functions calculate in single precision. + if (overflow[j] && skipNanInf) continue; + + if (IsFloatInfinity(correct2) + || IsFloatNaN(correct2) + || IsFloatInfinity(correct3) + || IsFloatNaN(correct3)) + continue; + } + + err2 = Ulp_Error(test, correct2); + err3 = Ulp_Error(test, correct3); + fail = fail + && ((!(fabsf(err2) <= ulps)) + && (!(fabsf(err3) <= ulps))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + + // retry per section 6.5.3.4 + if (IsFloatResultSubnormal(correct2, ulps) + || IsFloatResultSubnormal(correct3, ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + } + } + } + + if (fabsf(err) > tinfo->maxError) + { + tinfo->maxError = fabsf(err); + tinfo->maxErrorValue = s[j]; + tinfo->maxErrorValue2 = s2[j]; + } + if (fail) + { + vlog_error( + "\nERROR: %s%s: %f ulp error at {%a (0x%x), %a " + "(0x%x)}: *%a vs. %a (0x%8.8x) at index: %d\n", + name, sizeNames[k], err, s[j], ((cl_uint *)s)[j], + s2[j], ((cl_uint *)s2)[j], r[j], test, + ((cl_uint *)&test)[0], j); + error = -1; + goto exit; + } + } + } + } + } + + if (isFDim && gIsInRTZMode) (void)set_round(oldRoundMode, kfloat); + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], + out[j], 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", + j, error); + return error; + } + } + + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n"); + + + if (0 == (base & 0x0fffffff)) + { + if (gVerboseBruteForce) + { + vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f " + "ThreadCount:%2u\n", + base, job->step, job->scale, buffer_elements, job->ulps, + job->threadCount); + } + else + { + vlog("."); + } + fflush(stdout); + } + +exit: + return error; +} + +} // anonymous namespace + +int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info{}; + cl_int error; + float maxError = 0.0f; + double maxErrorVal = 0.0; + double maxErrorVal2 = 0.0; + + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + + // Init test_info + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_float)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; + test_info.ftz = + f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + test_info.relaxedMode = relaxedMode; + test_info.isFDim = 0 == strcmp("fdim", f->nameInCode); + test_info.skipNanInf = test_info.isFDim && !gInfNanSupport; + test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode); + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + test_info.k[i].resize(test_info.threadCount, nullptr); + } + + test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_float), + test_info.subBufferSize * sizeof(cl_float) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + test_info.tinfo[i].inBuf2 = + clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf2) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + + test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + // Accumulate the arithmetic errors + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + if (test_info.tinfo[i].maxError > maxError) + { + maxError = test_info.tinfo[i].maxError; + maxErrorVal = test_info.tinfo[i].maxErrorValue; + maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; + } + } + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + for (auto &kernel : test_info.k[i]) + { + clReleaseKernel(kernel); + } + } + + for (auto &threadInfo : test_info.tinfo) + { + free_mtdata(threadInfo.d); + clReleaseMemObject(threadInfo.inBuf); + clReleaseMemObject(threadInfo.inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(threadInfo.outBuf[j]); + clReleaseCommandQueue(threadInfo.tQueue); + } + + return error; +} diff --git a/test_conformance/math_brute_force/binary_i.cpp b/test_conformance/math_brute_force/binary_i.cpp deleted file mode 100644 index 6ba0eb58e5..0000000000 --- a/test_conformance/math_brute_force/binary_i.cpp +++ /dev/null @@ -1,1256 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "Utility.h" - -#include -#include -#include "FunctionList.h" - -int TestFunc_Float_Float_Int(const Func *f, MTdata, bool relaxedMode); -int TestFunc_Double_Double_Int(const Func *f, MTdata, bool relaxedMode); - -extern const vtbl _binary_i = { "binary_i", TestFunc_Float_Float_Int, - TestFunc_Double_Double_Int }; - -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode); -static int BuildKernelDouble(const char *name, int vectorSize, - cl_uint kernel_count, cl_kernel *k, cl_program *p, - bool relaxedMode); - -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) -{ - const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global int", sizeNames[vectorSize], "* in2 )\n" - "{\n" - " int i = get_global_id(0);\n" - " out[i] = ", name, "( in1[i], in2[i] );\n" - "}\n" - }; - - const char *c3[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global float* in, __global int* in2)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " float3 f0 = vload3( 0, in + 3 * i );\n" - " int3 i0 = vload3( 0, in2 + 3 * i );\n" - " f0 = ", name, "( f0, i0 );\n" - " vstore3( f0, 0, out + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n" - " float3 f0;\n" - " int3 i0;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " f0 = (float3)( in[3*i], NAN, NAN ); \n" - " i0 = (int3)( in2[3*i], 0xdead, 0xdead ); \n" - " break;\n" - " case 0:\n" - " f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n" - " i0 = (int3)( in2[3*i], in2[3*i+1], 0xdead ); \n" - " break;\n" - " }\n" - " f0 = ", name, "( f0, i0 );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = f0.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = f0.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - - const char **kern = c; - size_t kernSize = sizeof(c)/sizeof(c[0]); - - if( sizeValues[vectorSize] == 3 ) - { - kern = c3; - kernSize = sizeof(c3)/sizeof(c3[0]); - } - - char testName[32]; - snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - - return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, - relaxedMode); -} - -static int BuildKernelDouble(const char *name, int vectorSize, - cl_uint kernel_count, cl_kernel *k, cl_program *p, - bool relaxedMode) -{ - const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global int", sizeNames[vectorSize], "* in2 )\n" - "{\n" - " int i = get_global_id(0);\n" - " out[i] = ", name, "( in1[i], in2[i] );\n" - "}\n" - }; - - const char *c3[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in, __global int* in2)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " double3 d0 = vload3( 0, in + 3 * i );\n" - " int3 i0 = vload3( 0, in2 + 3 * i );\n" - " d0 = ", name, "( d0, i0 );\n" - " vstore3( d0, 0, out + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n" - " double3 d0;\n" - " int3 i0;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " d0 = (double3)( in[3*i], NAN, NAN ); \n" - " i0 = (int3)( in2[3*i], 0xdead, 0xdead ); \n" - " break;\n" - " case 0:\n" - " d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n" - " i0 = (int3)( in2[3*i], in2[3*i+1], 0xdead ); \n" - " break;\n" - " }\n" - " d0 = ", name, "( d0, i0 );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = d0.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = d0.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - - const char **kern = c; - size_t kernSize = sizeof(c)/sizeof(c[0]); - - if( sizeValues[vectorSize] == 3 ) - { - kern = c3; - kernSize = sizeof(c3)/sizeof(c3[0]); - } - - char testName[32]; - snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - - return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, - relaxedMode); -} - -typedef struct BuildKernelInfo -{ - cl_uint offset; // the first vector size to build - cl_uint kernel_count; - cl_kernel **kernels; - cl_program *programs; - const char *nameInCode; - bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -}BuildKernelInfo; - -static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); -static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ) -{ - BuildKernelInfo *info = (BuildKernelInfo*) p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i], info->programs + i, info->relaxedMode); -} - -static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); -static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ) -{ - BuildKernelInfo *info = (BuildKernelInfo*) p; - cl_uint i = info->offset + job_id; - return BuildKernelDouble(info->nameInCode, i, info->kernel_count, - info->kernels[i], info->programs + i, - info->relaxedMode); -} - - -// A table of more difficult cases to get right -static const float specialValuesFloat[] = { - -NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39), MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38), - MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f, -4.0f, -3.5f, - -3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25), - MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27), - MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150), - MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f, - - +NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38), - MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f, - +3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25), - MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27), - MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150), - MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f -}; -static size_t specialValuesFloatCount = sizeof( specialValuesFloat ) / sizeof( specialValuesFloat[0] ); - - -static const int specialValuesInt[] = { 0, 1, 2, 3, 126, 127, 128, 0x02000001, 0x04000001, 1465264071, 1488522147, - -1, -2, -3, -126, -127, -128, -0x02000001, -0x04000001, -1465264071, -1488522147 }; -static size_t specialValuesIntCount = sizeof( specialValuesInt ) / sizeof( specialValuesInt[0] ); - -//Thread specific data for a worker thread -typedef struct ThreadInfo -{ - cl_mem inBuf; // input buffer for the thread - cl_mem inBuf2; // input buffer for the thread - cl_mem outBuf[ VECTOR_SIZE_COUNT ]; // output buffers for the thread - float maxError; // max error value. Init to 0. - double maxErrorValue; // position of the max error value (param 1). Init to 0. - cl_int maxErrorValue2; // position of the max error value (param 2). Init to 0. - MTdata d; - cl_command_queue tQueue; // per thread command queue to improve performance -}ThreadInfo; - -typedef struct TestInfo -{ - size_t subBufferSize; // Size of the sub-buffer in elements - const Func *f; // A pointer to the function info - cl_program programs[ VECTOR_SIZE_COUNT ]; // programs for various vector sizes - cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id] - ThreadInfo *tinfo; // An array of thread specific information for each worker thread - cl_uint threadCount; // Number of worker threads - cl_uint jobCount; // Number of jobs - cl_uint step; // step between each chunk and the next. - cl_uint scale; // stride between individual test values - float ulps; // max_allowed ulps - int ftz; // non-zero if running in flush to zero mode - - // no special values -}TestInfo; - -static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p ); - -int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - size_t i, j; - float maxError = 0.0f; - double maxErrorVal = 0.0; - cl_int maxErrorVal2 = 0; - - logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - - // Init test_info - memset( &test_info, 0, sizeof( test_info ) ); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = 1; - if (gWimpyMode) - { - test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor; - } - test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - //there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; - test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); - - // cl_kernels aren't thread safe, so we make one for each vector size for every thread - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - { - size_t array_size = test_info.threadCount * sizeof( cl_kernel ); - test_info.k[i] = (cl_kernel*)malloc( array_size ); - if( NULL == test_info.k[i] ) - { - vlog_error( "Error: Unable to allocate storage for kernels!\n" ); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset( test_info.k[i], 0, array_size ); - } - test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) ); - if( NULL == test_info.tinfo ) - { - vlog_error( "Error: Unable to allocate storage for thread specific data.\n" ); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) ); - for( i = 0; i < test_info.threadCount; i++ ) - { - cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_float), test_info.subBufferSize * sizeof( cl_float) }; - test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if( error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - cl_buffer_region region2 = { i * test_info.subBufferSize * sizeof( cl_int), test_info.subBufferSize * sizeof( cl_int) }; - test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion2, &error); - if( error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if( error || NULL == test_info.tinfo[i].outBuf[j] ) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - } - test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error); - if( NULL == test_info.tinfo[i].tQueue || error ) - { - vlog_error( "clCreateCommandQueue failed. (%d)\n", error ); - goto exit; - } - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) - goto exit; - } - - // Run the kernels - error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info ); - - - // Accumulate the arithmetic errors - for( i = 0; i < test_info.threadCount; i++ ) - { - if( test_info.tinfo[i].maxError > maxError ) - { - maxError = test_info.tinfo[i].maxError; - maxErrorVal = test_info.tinfo[i].maxErrorValue; - maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; - } - } - - if( error ) - goto exit; - - if( ! gSkipCorrectnessTesting ) - { - if( gWimpyMode ) - vlog( "Wimp pass" ); - else - vlog( "passed" ); - } - - - if( gMeasureTimes ) - { - //Init input arrays - uint32_t *p = (uint32_t *)gIn; - uint32_t *p2 = (uint32_t *)gIn2; - for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ ) - { - p[j] = (genrand_int32(d) & ~0x40000000) | 0x38000000; - p2[j] = 3; - } - - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error ); - return error; - } - - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeof( cl_float ) * sizeValues[j]; - size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; // BUFFER_SIZE / vectorSize rounded up - if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; } - if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; } - if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; } - - double sum = 0.0; - double bestTime = INFINITY; - for( i = 0; i < PERF_LOOP_COUNT; i++ ) - { - uint64_t startTime = GetTime(); - if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - - // Make sure OpenCL is done - if( (error = clFinish(gQueue) ) ) - { - vlog_error( "Error %d at clFinish\n", error ); - goto exit; - } - - uint64_t endTime = GetTime(); - double time = SubtractTime( endTime, startTime ); - sum += time; - if( time < bestTime ) - bestTime = time; - } - - if( gReportAverageTimes ) - bestTime = sum / PERF_LOOP_COUNT; - double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( float ) ); - vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] ); - } - } - - if( ! gSkipCorrectnessTesting ) - vlog( "\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2 ); - vlog( "\n" ); - - -exit: - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - { - clReleaseProgram(test_info.programs[i]); - if( test_info.k[i] ) - { - for( j = 0; j < test_info.threadCount; j++ ) - clReleaseKernel(test_info.k[i][j]); - - free( test_info.k[i] ); - } - } - if( test_info.tinfo ) - { - for( i = 0; i < test_info.threadCount; i++ ) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free( test_info.tinfo ); - } - - return error; -} - - -static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data ) -{ - const TestInfo *job = (const TestInfo *) data; - size_t buffer_elements = job->subBufferSize; - size_t buffer_size = buffer_elements * sizeof( cl_float ); - cl_uint base = job_id * (cl_uint) job->step; - ThreadInfo *tinfo = job->tinfo + thread_id; - float ulps = job->ulps; - fptr func = job->f->func; - int ftz = job->ftz; - MTdata d = tinfo->d; - cl_uint j, k; - cl_int error; - const char *name = job->f->name; - cl_uint *t; - cl_float *r,*s; - cl_int *s2; - - // start the map of the output arrays - cl_event e[ VECTOR_SIZE_COUNT ]; - cl_uint *out[ VECTOR_SIZE_COUNT ]; - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error); - if( error || NULL == out[j]) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - return error; - } - } - - // Get that moving - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush failed\n" ); - - //Init input array - cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements; - cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements; - j = 0; - int totalSpecialValueCount = specialValuesFloatCount * specialValuesIntCount; - int indx = (totalSpecialValueCount - 1) / buffer_elements; - if( job_id <= (cl_uint)indx ) - { // test edge cases - float *fp = (float *)p; - cl_int *ip2 = (cl_int *)p2; - uint32_t x, y; - - x = (job_id * buffer_elements) % specialValuesFloatCount; - y = (job_id * buffer_elements) / specialValuesFloatCount; - - for( ; j < buffer_elements; j++ ) - { - fp[j] = specialValuesFloat[x]; - ip2[j] = specialValuesInt[y]; - if( ++x >= specialValuesFloatCount ) - { - x = 0; - y++; - if( y >= specialValuesIntCount ) - break; - } - } - } - - //Init any remaining values. - for( ; j < buffer_elements; j++ ) - { - p[j] = genrand_int32(d); - p2[j] = genrand_int32(d); - } - - if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error ); - goto exit; - } - - if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error ); - goto exit; - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - //Wait for the map to finish - if( (error = clWaitForEvents(1, e + j) )) - { - vlog_error( "Error: clWaitForEvents failed! err: %d\n", error ); - goto exit; - } - if( (error = clReleaseEvent( e[j] ) )) - { - vlog_error( "Error: clReleaseEvent failed! err: %d\n", error ); - goto exit; - } - - // Fill the result buffer with garbage, so that old results don't carry over - uint32_t pattern = 0xffffdead; - memset_pattern4(out[j], &pattern, buffer_size); - if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error ); - goto exit; - } - - // run the kernel - size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j]; - cl_kernel kernel = job->k[j][thread_id]; //each worker thread has its own copy of the cl_kernel - cl_program program = job->programs[j]; - - if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; } - if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; } - if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; } - - if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL))) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - } - - // Get that moving - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush 2 failed\n" ); - - if( gSkipCorrectnessTesting ) - return CL_SUCCESS; - - //Calculate the correctly rounded reference result - r = (float *)gOut_Ref + thread_id * buffer_elements; - s = (float *)gIn + thread_id * buffer_elements; - s2 = (cl_int *)gIn2 + thread_id * buffer_elements; - for( j = 0; j < buffer_elements; j++ ) - r[j] = (float) func.f_fi( s[j], s2[j] ); - - // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue. - for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ ) - { - out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error); - if( error || NULL == out[j] ) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - goto exit; - } - } - - // Wait for the last buffer - out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error); - if( error || NULL == out[j] ) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - goto exit; - } - - //Verify data - t = (cl_uint *)r; - for( j = 0; j < buffer_elements; j++ ) - { - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - cl_uint *q = out[k]; - - // If we aren't getting the correctly rounded result - if( t[j] != q[j] ) - { - float test = ((float*) q)[j]; - double correct = func.f_fi( s[j], s2[j] ); - float err = Ulp_Error( test, correct ); - int fail = ! (fabsf(err) <= ulps); - - if( fail && ftz ) - { - // retry per section 6.5.3.2 - if( IsFloatResultSubnormal(correct, ulps ) ) - { - fail = fail && ( test != 0.0f ); - if( ! fail ) - err = 0.0f; - } - - // retry per section 6.5.3.3 - if( IsFloatSubnormal( s[j] ) ) - { - double correct2, correct3; - float err2, err3; - correct2 = func.f_fi( 0.0, s2[j] ); - correct3 = func.f_fi( -0.0, s2[j] ); - err2 = Ulp_Error( test, correct2 ); - err3 = Ulp_Error( test, correct3 ); - fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - - // retry per section 6.5.3.4 - if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - } - } - - if( fabsf(err ) > tinfo->maxError ) - { - tinfo->maxError = fabsf(err); - tinfo->maxErrorValue = s[j]; - tinfo->maxErrorValue2 = s2[j]; - } - if( fail ) - { - vlog_error( - "\nERROR: %s%s: %f ulp error at {%a (0x%8.8x), %d}: " - "*%a (0x%8.8x) vs. %a (0x%8.8x) at index: %d\n", - name, sizeNames[k], err, s[j], ((uint32_t *)s)[j], - s2[j], r[j], ((uint32_t *)r)[j], test, - ((cl_uint *)&test)[0], j); - error = -1; - goto exit; - } - } - } - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) ) - { - vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error ); - return error; - } - } - - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush 3 failed\n" ); - - - if( 0 == ( base & 0x0fffffff) ) - { - if (gVerboseBruteForce) - { - vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->ulps, job->threadCount); - } else - { - vlog("." ); - } - fflush(stdout); - } - -exit: - return error; - -} - - - -// A table of more difficult cases to get right -static const double specialValuesDouble[] = { - -NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10), - MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100., -4.0, -3.5, - -3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53), - MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55), - MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074), - MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074), - MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0, - - +NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10), - MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100., +4.0, +3.5, - +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53), - MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55), - MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074), - MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074), - MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0, -}; -static size_t specialValuesDoubleCount = sizeof( specialValuesDouble ) / sizeof( specialValuesDouble[0] ); - -static const int specialValuesInt2[] = { 0, 1, 2, 3, 1022, 1023, 1024, INT_MIN, INT_MAX, - -1, -2, -3, -1022, -1023, -11024, -INT_MAX }; -static size_t specialValuesInt2Count = sizeof( specialValuesInt ) / sizeof( specialValuesInt[0] ); - -static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *p ); - -int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - size_t i, j; - float maxError = 0.0f; - double maxErrorVal = 0.0; - cl_int maxErrorVal2 = 0; - - logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); - - // Init test_info - memset( &test_info, 0, sizeof( test_info ) ); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = 1; - if (gWimpyMode) - { - test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor; - } - test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - //there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ulps = f->double_ulps; - test_info.ftz = f->ftz || gForceFTZ; - - // cl_kernels aren't thread safe, so we make one for each vector size for every thread - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - { - size_t array_size = test_info.threadCount * sizeof( cl_kernel ); - test_info.k[i] = (cl_kernel*)malloc( array_size ); - if( NULL == test_info.k[i] ) - { - vlog_error( "Error: Unable to allocate storage for kernels!\n" ); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset( test_info.k[i], 0, array_size ); - } - test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) ); - if( NULL == test_info.tinfo ) - { - vlog_error( "Error: Unable to allocate storage for thread specific data.\n" ); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) ); - for( i = 0; i < test_info.threadCount; i++ ) - { - cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_double), test_info.subBufferSize * sizeof( cl_double) }; - test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if( error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - cl_buffer_region region2 = { i * test_info.subBufferSize * sizeof( cl_int), test_info.subBufferSize * sizeof( cl_int) }; - test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion2, &error); - if( error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - /* Qualcomm fix: 9461 read-write flags must be compatible with parent buffer */ - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - /* Qualcomm fix: end */ - if( error || NULL == test_info.tinfo[i].outBuf[j] ) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - } - test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error); - if( NULL == test_info.tinfo[i].tQueue || error ) - { - vlog_error( "clCreateCommandQueue failed. (%d)\n", error ); - goto exit; - } - - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); - } - - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) - goto exit; - } - - // Run the kernels - if( !gSkipCorrectnessTesting ) - error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info ); - - - // Accumulate the arithmetic errors - for( i = 0; i < test_info.threadCount; i++ ) - { - if( test_info.tinfo[i].maxError > maxError ) - { - maxError = test_info.tinfo[i].maxError; - maxErrorVal = test_info.tinfo[i].maxErrorValue; - maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; - } - } - - if( error ) - goto exit; - - if( ! gSkipCorrectnessTesting ) - { - if( gWimpyMode ) - vlog( "Wimp pass" ); - else - vlog( "passed" ); - } - - if( gMeasureTimes ) - { - //Init input arrays - double *p = (double *)gIn; - cl_int *p2 = (cl_int *)gIn2; - for( j = 0; j < BUFFER_SIZE / sizeof( cl_double ); j++ ) - { - p[j] = DoubleFromUInt32(genrand_int32(d)); - p2[j] = 3; - } - - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE/2, gIn2, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error ); - return error; - } - - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeof( cl_double ) * sizeValues[j]; - size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; // BUFFER_SIZE / vectorSize rounded up - if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; } - if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; } - if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; } - - double sum = 0.0; - double bestTime = INFINITY; - for( i = 0; i < PERF_LOOP_COUNT; i++ ) - { - uint64_t startTime = GetTime(); - if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - - // Make sure OpenCL is done - if( (error = clFinish(gQueue) ) ) - { - vlog_error( "Error %d at clFinish\n", error ); - goto exit; - } - - uint64_t endTime = GetTime(); - double time = SubtractTime( endTime, startTime ); - sum += time; - if( time < bestTime ) - bestTime = time; - } - - if( gReportAverageTimes ) - bestTime = sum / PERF_LOOP_COUNT; - double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( double ) ); - vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] ); - } - for( ; j < gMaxVectorSizeIndex; j++ ) - vlog( "\t -- " ); - } - - if( ! gSkipCorrectnessTesting ) - vlog( "\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2 ); - vlog( "\n" ); - - -exit: - // Release - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - { - clReleaseProgram(test_info.programs[i]); - if( test_info.k[i] ) - { - for( j = 0; j < test_info.threadCount; j++ ) - clReleaseKernel(test_info.k[i][j]); - - free( test_info.k[i] ); - } - } - if( test_info.tinfo ) - { - for( i = 0; i < test_info.threadCount; i++ ) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free( test_info.tinfo ); - } - - return error; -} - -static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data ) -{ - const TestInfo *job = (const TestInfo *) data; - size_t buffer_elements = job->subBufferSize; - size_t buffer_size = buffer_elements * sizeof( cl_double ); - cl_uint base = job_id * (cl_uint) job->step; - ThreadInfo *tinfo = job->tinfo + thread_id; - float ulps = job->ulps; - dptr func = job->f->dfunc; - int ftz = job->ftz; - MTdata d = tinfo->d; - cl_uint j, k; - cl_int error; - const char *name = job->f->name; - cl_ulong *t; - cl_double *r,*s; - cl_int *s2; - - Force64BitFPUPrecision(); - - // start the map of the output arrays - cl_event e[ VECTOR_SIZE_COUNT ]; - cl_ulong *out[ VECTOR_SIZE_COUNT ]; - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error); - if( error || NULL == out[j]) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - return error; - } - } - - // Get that moving - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush failed\n" ); - - //Init input array - cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements; - cl_int *p2 = (cl_int *)gIn2 + thread_id * buffer_elements; - j = 0; - int totalSpecialValueCount = specialValuesDoubleCount * specialValuesInt2Count; - int indx = (totalSpecialValueCount - 1) / buffer_elements; - if( job_id <= (cl_uint)indx ) - { // test edge cases - cl_double *fp = (cl_double *)p; - cl_int *ip2 = (cl_int *)p2; - uint32_t x, y; - - x = (job_id * buffer_elements) % specialValuesDoubleCount; - y = (job_id * buffer_elements) / specialValuesDoubleCount; - - for( ; j < buffer_elements; j++ ) - { - fp[j] = specialValuesDouble[x]; - ip2[j] = specialValuesInt2[y]; - if( ++x >= specialValuesDoubleCount ) - { - x = 0; - y++; - if( y >= specialValuesInt2Count ) - break; - } - } - } - - //Init any remaining values. - for( ; j < buffer_elements; j++ ) - { - p[j] = DoubleFromUInt32(genrand_int32(d)); - p2[j] = genrand_int32(d); - } - - if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error ); - goto exit; - } - - if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size/2, p2, 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error ); - goto exit; - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - //Wait for the map to finish - if( (error = clWaitForEvents(1, e + j) )) - { - vlog_error( "Error: clWaitForEvents failed! err: %d\n", error ); - goto exit; - } - if( (error = clReleaseEvent( e[j] ) )) - { - vlog_error( "Error: clReleaseEvent failed! err: %d\n", error ); - goto exit; - } - - // Fill the result buffer with garbage, so that old results don't carry over - uint32_t pattern = 0xffffdead; - memset_pattern4(out[j], &pattern, buffer_size); - if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error ); - goto exit; - } - - // run the kernel - size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j]; - cl_kernel kernel = job->k[j][thread_id]; //each worker thread has its own copy of the cl_kernel - cl_program program = job->programs[j]; - - if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; } - if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; } - if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; } - - if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL))) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - } - - // Get that moving - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush 2 failed\n" ); - - if( gSkipCorrectnessTesting ) - return CL_SUCCESS; - - //Calculate the correctly rounded reference result - r = (cl_double *)gOut_Ref + thread_id * buffer_elements; - s = (cl_double *)gIn + thread_id * buffer_elements; - s2 = (cl_int *)gIn2 + thread_id * buffer_elements; - for( j = 0; j < buffer_elements; j++ ) - r[j] = (cl_double) func.f_fi( s[j], s2[j] ); - - // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue. - for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ ) - { - out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error); - if( error || NULL == out[j] ) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - goto exit; - } - } - - // Wait for the last buffer - out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error); - if( error || NULL == out[j] ) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - goto exit; - } - - //Verify data - t = (cl_ulong *)r; - for( j = 0; j < buffer_elements; j++ ) - { - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - cl_ulong *q = out[k]; - - // If we aren't getting the correctly rounded result - if( t[j] != q[j] ) - { - cl_double test = ((cl_double*) q)[j]; - long double correct = func.f_fi( s[j], s2[j] ); - float err = Bruteforce_Ulp_Error_Double( test, correct ); - int fail = ! (fabsf(err) <= ulps); - - if( fail && ftz ) - { - // retry per section 6.5.3.2 - if( IsDoubleResultSubnormal(correct, ulps ) ) - { - fail = fail && ( test != 0.0f ); - if( ! fail ) - err = 0.0f; - } - - // retry per section 6.5.3.3 - if( IsDoubleSubnormal( s[j] ) ) - { - long double correct2 = func.f_fi( 0.0, s2[j] ); - long double correct3 = func.f_fi( -0.0, s2[j] ); - float err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); - float err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); - fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - - // retry per section 6.5.3.4 - if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - } - } - - if( fabsf(err ) > tinfo->maxError ) - { - tinfo->maxError = fabsf(err); - tinfo->maxErrorValue = s[j]; - tinfo->maxErrorValue2 = s2[j]; - } - if( fail ) - { - vlog_error( "\nERROR: %s%s: %f ulp error at {%.13la, %d}: *%.13la vs. %.13la\n", name, sizeNames[k], err, s[j], s2[j], r[j], test ); - error = -1; - goto exit; - } - } - } - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) ) - { - vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error ); - return error; - } - } - - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush 3 failed\n" ); - - - if( 0 == ( base & 0x0fffffff) ) - { - if (gVerboseBruteForce) - { - vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ulps:%5.3f ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->ulps, job->threadCount); - } else - { - vlog("." ); - } - fflush(stdout); - } - -exit: - return error; - -} - - - diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp new file mode 100644 index 0000000000..f15c21ede2 --- /dev/null +++ b/test_conformance/math_brute_force/binary_i_double.cpp @@ -0,0 +1,729 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "common.h" +#include "function_list.h" +#include "test_functions.h" +#include "utility.h" + +#include +#include + +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) +{ + const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global double", + sizeNames[vectorSize], + "* out, __global double", + sizeNames[vectorSize], + "* in1, __global int", + sizeNames[vectorSize], + "* in2 )\n" + "{\n" + " size_t i = get_global_id(0);\n" + " out[i] = ", + name, + "( in1[i], in2[i] );\n" + "}\n" }; + + const char *c3[] = { + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global double* out, __global double* in, __global int* in2)\n" + "{\n" + " size_t i = get_global_id(0);\n" + " if( i + 1 < get_global_size(0) )\n" + " {\n" + " double3 d0 = vload3( 0, in + 3 * i );\n" + " int3 i0 = vload3( 0, in2 + 3 * i );\n" + " d0 = ", + name, + "( d0, i0 );\n" + " vstore3( d0, 0, out + 3*i );\n" + " }\n" + " else\n" + " {\n" + " size_t parity = i & 1; // Figure out how many elements are " + "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " + "buffer size \n" + " double3 d0;\n" + " int3 i0;\n" + " switch( parity )\n" + " {\n" + " case 1:\n" + " d0 = (double3)( in[3*i], NAN, NAN ); \n" + " i0 = (int3)( in2[3*i], 0xdead, 0xdead ); \n" + " break;\n" + " case 0:\n" + " d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n" + " i0 = (int3)( in2[3*i], in2[3*i+1], 0xdead ); \n" + " break;\n" + " }\n" + " d0 = ", + name, + "( d0, i0 );\n" + " switch( parity )\n" + " {\n" + " case 0:\n" + " out[3*i+1] = d0.y; \n" + " // fall through\n" + " case 1:\n" + " out[3*i] = d0.x; \n" + " break;\n" + " }\n" + " }\n" + "}\n" + }; + + const char **kern = c; + size_t kernSize = sizeof(c) / sizeof(c[0]); + + if (sizeValues[vectorSize] == 3) + { + kern = c3; + kernSize = sizeof(c3) / sizeof(c3[0]); + } + + char testName[32]; + snprintf(testName, sizeof(testName) - 1, "math_kernel%s", + sizeNames[vectorSize]); + + return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, + relaxedMode); +} + +struct BuildKernelInfo +{ + cl_uint offset; // the first vector size to build + cl_uint kernel_count; + KernelMatrix &kernels; + cl_program *programs; + const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. +}; + +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +{ + BuildKernelInfo *info = (BuildKernelInfo *)p; + cl_uint i = info->offset + job_id; + return BuildKernel(info->nameInCode, i, info->kernel_count, + info->kernels[i].data(), info->programs + i, + info->relaxedMode); +} + +// Thread specific data for a worker thread +struct ThreadInfo +{ + cl_mem inBuf; // input buffer for the thread + cl_mem inBuf2; // input buffer for the thread + cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread + float maxError; // max error value. Init to 0. + double + maxErrorValue; // position of the max error value (param 1). Init to 0. + cl_int maxErrorValue2; // position of the max error value (param 2). Init + // to 0. + MTdata d; + cl_command_queue tQueue; // per thread command queue to improve performance +}; + +struct TestInfo +{ + size_t subBufferSize; // Size of the sub-buffer in elements + const Func *f; // A pointer to the function info + cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes + + // Thread-specific kernels for each vector size: + // k[vector_size][thread_id] + KernelMatrix k; + + // Array of thread specific information + std::vector tinfo; + + cl_uint threadCount; // Number of worker threads + cl_uint jobCount; // Number of jobs + cl_uint step; // step between each chunk and the next. + cl_uint scale; // stride between individual test values + float ulps; // max_allowed ulps + int ftz; // non-zero if running in flush to zero mode + + // no special values +}; + +// A table of more difficult cases to get right +const double specialValues[] = { + -NAN, + -INFINITY, + -DBL_MAX, + MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), + MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), + MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), + MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10), + MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), + MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), + MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), + MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), + -1000.0, + -100.0, + -4.0, + -3.5, + -3.0, + MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), + -2.5, + MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), + -2.0, + MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), + -1.5, + MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52), + MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), + -1.0, + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53), + MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), + -0.5, + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54), + MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), + -0.25, + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55), + MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), + -DBL_MIN, + MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), + MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), + MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), + MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), + MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), + -0.0, + + +NAN, + +INFINITY, + +DBL_MAX, + MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), + MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11), + MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), + MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10), + MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), + MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), + MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), + MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), + +1000.0, + +100.0, + +4.0, + +3.5, + +3.0, + MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), + +2.5, + MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), + +2.0, + MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), + +1.5, + MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52), + MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), + +1.0, + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53), + MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), + +0.5, + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54), + MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), + +0.25, + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55), + MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), + +DBL_MIN, + MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), + MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), + MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), + MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), + MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), + +0.0, +}; + +constexpr size_t specialValuesCount = + sizeof(specialValues) / sizeof(specialValues[0]); + +const int specialValuesInt[] = { + 0, 1, 2, 3, 1022, 1023, 1024, INT_MIN, + INT_MAX, -1, -2, -3, -1022, -1023, -11024, -INT_MAX, +}; + +constexpr size_t specialValuesIntCount = + sizeof(specialValuesInt) / sizeof(specialValuesInt[0]); + +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +{ + TestInfo *job = (TestInfo *)data; + size_t buffer_elements = job->subBufferSize; + size_t buffer_size = buffer_elements * sizeof(cl_double); + cl_uint base = job_id * (cl_uint)job->step; + ThreadInfo *tinfo = &(job->tinfo[thread_id]); + float ulps = job->ulps; + dptr func = job->f->dfunc; + int ftz = job->ftz; + MTdata d = tinfo->d; + cl_int error; + const char *name = job->f->name; + cl_ulong *t; + cl_double *r; + cl_double *s; + cl_int *s2; + + Force64BitFPUPrecision(); + + // start the map of the output arrays + cl_event e[VECTOR_SIZE_COUNT]; + cl_ulong *out[VECTOR_SIZE_COUNT]; + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + out[j] = (cl_ulong *)clEnqueueMapBuffer( + tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, + buffer_size, 0, NULL, e + j, &error); + if (error || NULL == out[j]) + { + vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, + error); + return error; + } + } + + // Get that moving + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n"); + + // Init input array + cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements; + cl_int *p2 = (cl_int *)gIn2 + thread_id * buffer_elements; + size_t idx = 0; + int totalSpecialValueCount = specialValuesCount * specialValuesIntCount; + int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements; + + if (job_id <= (cl_uint)lastSpecialJobIndex) + { // test edge cases + cl_double *fp = (cl_double *)p; + cl_int *ip2 = (cl_int *)p2; + uint32_t x, y; + + x = (job_id * buffer_elements) % specialValuesCount; + y = (job_id * buffer_elements) / specialValuesCount; + + for (; idx < buffer_elements; idx++) + { + fp[idx] = specialValues[x]; + ip2[idx] = specialValuesInt[y]; + if (++x >= specialValuesCount) + { + x = 0; + y++; + if (y >= specialValuesIntCount) break; + } + } + } + + // Init any remaining values. + for (; idx < buffer_elements; idx++) + { + p[idx] = DoubleFromUInt32(genrand_int32(d)); + p2[idx] = genrand_int32(d); + } + + if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, + buffer_size, p, 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error); + goto exit; + } + + if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, + buffer_size / 2, p2, 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + // Wait for the map to finish + if ((error = clWaitForEvents(1, e + j))) + { + vlog_error("Error: clWaitForEvents failed! err: %d\n", error); + goto exit; + } + if ((error = clReleaseEvent(e[j]))) + { + vlog_error("Error: clReleaseEvent failed! err: %d\n", error); + goto exit; + } + + // Fill the result buffer with garbage, so that old results don't carry + // over + uint32_t pattern = 0xffffdead; + memset_pattern4(out[j], &pattern, buffer_size); + if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], + out[j], 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error); + goto exit; + } + + // run the kernel + size_t vectorCount = + (buffer_elements + sizeValues[j] - 1) / sizeValues[j]; + cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its + // own copy of the cl_kernel + cl_program program = job->programs[j]; + + if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]), + &tinfo->outBuf[j]))) + { + LogBuildError(program); + return error; + } + if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), + &tinfo->inBuf))) + { + LogBuildError(program); + return error; + } + if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), + &tinfo->inBuf2))) + { + LogBuildError(program); + return error; + } + + if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, + &vectorCount, NULL, 0, NULL, NULL))) + { + vlog_error("FAILED -- could not execute kernel\n"); + goto exit; + } + } + + // Get that moving + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n"); + + if (gSkipCorrectnessTesting) return CL_SUCCESS; + + // Calculate the correctly rounded reference result + r = (cl_double *)gOut_Ref + thread_id * buffer_elements; + s = (cl_double *)gIn + thread_id * buffer_elements; + s2 = (cl_int *)gIn2 + thread_id * buffer_elements; + for (size_t j = 0; j < buffer_elements; j++) + r[j] = (cl_double)func.f_fi(s[j], s2[j]); + + // Read the data back -- no need to wait for the first N-1 buffers but wait + // for the last buffer. This is an in order queue. + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE; + out[j] = (cl_ulong *)clEnqueueMapBuffer( + tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0, + buffer_size, 0, NULL, NULL, &error); + if (error || NULL == out[j]) + { + vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, + error); + goto exit; + } + } + + // Verify data + t = (cl_ulong *)r; + for (size_t j = 0; j < buffer_elements; j++) + { + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + cl_ulong *q = out[k]; + + // If we aren't getting the correctly rounded result + if (t[j] != q[j]) + { + cl_double test = ((cl_double *)q)[j]; + long double correct = func.f_fi(s[j], s2[j]); + float err = Bruteforce_Ulp_Error_Double(test, correct); + int fail = !(fabsf(err) <= ulps); + + if (fail && ftz) + { + // retry per section 6.5.3.2 + if (IsDoubleResultSubnormal(correct, ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + + // retry per section 6.5.3.3 + if (IsDoubleSubnormal(s[j])) + { + long double correct2 = func.f_fi(0.0, s2[j]); + long double correct3 = func.f_fi(-0.0, s2[j]); + float err2 = + Bruteforce_Ulp_Error_Double(test, correct2); + float err3 = + Bruteforce_Ulp_Error_Double(test, correct3); + fail = fail + && ((!(fabsf(err2) <= ulps)) + && (!(fabsf(err3) <= ulps))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + + // retry per section 6.5.3.4 + if (IsDoubleResultSubnormal(correct2, ulps) + || IsDoubleResultSubnormal(correct3, ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + } + } + + if (fabsf(err) > tinfo->maxError) + { + tinfo->maxError = fabsf(err); + tinfo->maxErrorValue = s[j]; + tinfo->maxErrorValue2 = s2[j]; + } + if (fail) + { + vlog_error("\nERROR: %s%s: %f ulp error at {%.13la, %d}: " + "*%.13la vs. %.13la\n", + name, sizeNames[k], err, s[j], s2[j], r[j], + test); + error = -1; + goto exit; + } + } + } + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], + out[j], 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", + j, error); + return error; + } + } + + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n"); + + + if (0 == (base & 0x0fffffff)) + { + if (gVerboseBruteForce) + { + vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ulps:%5.3f " + "ThreadCount:%2u\n", + base, job->step, job->scale, buffer_elements, job->ulps, + job->threadCount); + } + else + { + vlog("."); + } + fflush(stdout); + } + +exit: + return error; +} + +} // anonymous namespace + +int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info{}; + cl_int error; + float maxError = 0.0f; + double maxErrorVal = 0.0; + cl_int maxErrorVal2 = 0; + + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); + + // Init test_info + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_double)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ulps = f->double_ulps; + test_info.ftz = f->ftz || gForceFTZ; + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + test_info.k[i].resize(test_info.threadCount, nullptr); + } + + test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_double), + test_info.subBufferSize * sizeof(cl_double) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + cl_buffer_region region2 = { i * test_info.subBufferSize + * sizeof(cl_int), + test_info.subBufferSize * sizeof(cl_int) }; + test_info.tinfo[i].inBuf2 = + clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion2, &error); + if (error || NULL == test_info.tinfo[i].inBuf2) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + + test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + // Accumulate the arithmetic errors + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + if (test_info.tinfo[i].maxError > maxError) + { + maxError = test_info.tinfo[i].maxError; + maxErrorVal = test_info.tinfo[i].maxErrorValue; + maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; + } + } + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + for (auto &kernel : test_info.k[i]) + { + clReleaseKernel(kernel); + } + } + + for (auto &threadInfo : test_info.tinfo) + { + free_mtdata(threadInfo.d); + clReleaseMemObject(threadInfo.inBuf); + clReleaseMemObject(threadInfo.inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(threadInfo.outBuf[j]); + clReleaseCommandQueue(threadInfo.tQueue); + } + + return error; +} diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp new file mode 100644 index 0000000000..9e27b00730 --- /dev/null +++ b/test_conformance/math_brute_force/binary_i_float.cpp @@ -0,0 +1,723 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "common.h" +#include "function_list.h" +#include "test_functions.h" +#include "utility.h" + +#include +#include + +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) +{ + const char *c[] = { "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global float", + sizeNames[vectorSize], + "* out, __global float", + sizeNames[vectorSize], + "* in1, __global int", + sizeNames[vectorSize], + "* in2 )\n" + "{\n" + " size_t i = get_global_id(0);\n" + " out[i] = ", + name, + "( in1[i], in2[i] );\n" + "}\n" }; + + const char *c3[] = { + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global float* out, __global float* in, __global int* in2)\n" + "{\n" + " size_t i = get_global_id(0);\n" + " if( i + 1 < get_global_size(0) )\n" + " {\n" + " float3 f0 = vload3( 0, in + 3 * i );\n" + " int3 i0 = vload3( 0, in2 + 3 * i );\n" + " f0 = ", + name, + "( f0, i0 );\n" + " vstore3( f0, 0, out + 3*i );\n" + " }\n" + " else\n" + " {\n" + " size_t parity = i & 1; // Figure out how many elements are " + "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " + "buffer size \n" + " float3 f0;\n" + " int3 i0;\n" + " switch( parity )\n" + " {\n" + " case 1:\n" + " f0 = (float3)( in[3*i], NAN, NAN ); \n" + " i0 = (int3)( in2[3*i], 0xdead, 0xdead ); \n" + " break;\n" + " case 0:\n" + " f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n" + " i0 = (int3)( in2[3*i], in2[3*i+1], 0xdead ); \n" + " break;\n" + " }\n" + " f0 = ", + name, + "( f0, i0 );\n" + " switch( parity )\n" + " {\n" + " case 0:\n" + " out[3*i+1] = f0.y; \n" + " // fall through\n" + " case 1:\n" + " out[3*i] = f0.x; \n" + " break;\n" + " }\n" + " }\n" + "}\n" + }; + + const char **kern = c; + size_t kernSize = sizeof(c) / sizeof(c[0]); + + if (sizeValues[vectorSize] == 3) + { + kern = c3; + kernSize = sizeof(c3) / sizeof(c3[0]); + } + + char testName[32]; + snprintf(testName, sizeof(testName) - 1, "math_kernel%s", + sizeNames[vectorSize]); + + return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, + relaxedMode); +} + +struct BuildKernelInfo +{ + cl_uint offset; // the first vector size to build + cl_uint kernel_count; + KernelMatrix &kernels; + cl_program *programs; + const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. +}; + +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +{ + BuildKernelInfo *info = (BuildKernelInfo *)p; + cl_uint i = info->offset + job_id; + return BuildKernel(info->nameInCode, i, info->kernel_count, + info->kernels[i].data(), info->programs + i, + info->relaxedMode); +} + +// Thread specific data for a worker thread +struct ThreadInfo +{ + cl_mem inBuf; // input buffer for the thread + cl_mem inBuf2; // input buffer for the thread + cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread + float maxError; // max error value. Init to 0. + double + maxErrorValue; // position of the max error value (param 1). Init to 0. + cl_int maxErrorValue2; // position of the max error value (param 2). Init + // to 0. + MTdata d; + cl_command_queue tQueue; // per thread command queue to improve performance +}; + +struct TestInfo +{ + size_t subBufferSize; // Size of the sub-buffer in elements + const Func *f; // A pointer to the function info + cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes + + // Thread-specific kernels for each vector size: + // k[vector_size][thread_id] + KernelMatrix k; + + // Array of thread specific information + std::vector tinfo; + + cl_uint threadCount; // Number of worker threads + cl_uint jobCount; // Number of jobs + cl_uint step; // step between each chunk and the next. + cl_uint scale; // stride between individual test values + float ulps; // max_allowed ulps + int ftz; // non-zero if running in flush to zero mode + + // no special values +}; + +// A table of more difficult cases to get right +const float specialValues[] = { + -NAN, + -INFINITY, + -FLT_MAX, + MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), + MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), + MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39), + MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), + MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), + MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38), + MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), + MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), + MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), + MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), + MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), + MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), + -1000.f, + -100.f, + -4.0f, + -3.5f, + -3.0f, + MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), + -2.5f, + MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), + -2.0f, + MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), + -1.5f, + MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24), + MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), + -1.0f, + MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25), + MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), + -0.5f, + MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), + MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), + -0.25f, + MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27), + MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), + -FLT_MIN, + MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), + MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), + MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), + MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), + MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), + MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150), + MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), + MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), + MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), + MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), + -0.0f, + + +NAN, + +INFINITY, + +FLT_MAX, + MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), + MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), + MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), + MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), + MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), + MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38), + MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), + MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), + MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), + MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), + MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), + MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), + +1000.f, + +100.f, + +4.0f, + +3.5f, + +3.0f, + MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), + 2.5f, + MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23), + +2.0f, + MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), + 1.5f, + MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), + MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), + +1.0f, + MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25), + MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), + +0.5f, + MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), + MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), + +0.25f, + MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27), + MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), + +FLT_MIN, + MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), + MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), + MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), + MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), + MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), + MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150), + MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), + MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), + MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), + MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), + +0.0f, +}; + +constexpr size_t specialValuesCount = + sizeof(specialValues) / sizeof(specialValues[0]); + +const int specialValuesInt[] = { + 0, 1, 2, 3, 126, 127, + 128, 0x02000001, 0x04000001, 1465264071, 1488522147, -1, + -2, -3, -126, -127, -128, -0x02000001, + -0x04000001, -1465264071, -1488522147, +}; + +constexpr size_t specialValuesIntCount = + sizeof(specialValuesInt) / sizeof(specialValuesInt[0]); + +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +{ + TestInfo *job = (TestInfo *)data; + size_t buffer_elements = job->subBufferSize; + size_t buffer_size = buffer_elements * sizeof(cl_float); + cl_uint base = job_id * (cl_uint)job->step; + ThreadInfo *tinfo = &(job->tinfo[thread_id]); + fptr func = job->f->func; + int ftz = job->ftz; + float ulps = job->ulps; + MTdata d = tinfo->d; + cl_int error; + const char *name = job->f->name; + cl_uint *t = 0; + cl_float *r = 0; + cl_float *s = 0; + cl_int *s2 = 0; + + // start the map of the output arrays + cl_event e[VECTOR_SIZE_COUNT]; + cl_uint *out[VECTOR_SIZE_COUNT]; + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + out[j] = (cl_uint *)clEnqueueMapBuffer( + tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, + buffer_size, 0, NULL, e + j, &error); + if (error || NULL == out[j]) + { + vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, + error); + return error; + } + } + + // Get that moving + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n"); + + // Init input array + cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements; + cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements; + size_t idx = 0; + int totalSpecialValueCount = specialValuesCount * specialValuesIntCount; + int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements; + + if (job_id <= (cl_uint)lastSpecialJobIndex) + { // test edge cases + float *fp = (float *)p; + cl_int *ip2 = (cl_int *)p2; + uint32_t x, y; + + x = (job_id * buffer_elements) % specialValuesCount; + y = (job_id * buffer_elements) / specialValuesCount; + + for (; idx < buffer_elements; idx++) + { + fp[idx] = specialValues[x]; + ip2[idx] = specialValuesInt[y]; + ++x; + if (x >= specialValuesCount) + { + x = 0; + y++; + if (y >= specialValuesIntCount) break; + } + } + } + + // Init any remaining values. + for (; idx < buffer_elements; idx++) + { + p[idx] = genrand_int32(d); + p2[idx] = genrand_int32(d); + } + + if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, + buffer_size, p, 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error); + goto exit; + } + + if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, + buffer_size, p2, 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + // Wait for the map to finish + if ((error = clWaitForEvents(1, e + j))) + { + vlog_error("Error: clWaitForEvents failed! err: %d\n", error); + goto exit; + } + if ((error = clReleaseEvent(e[j]))) + { + vlog_error("Error: clReleaseEvent failed! err: %d\n", error); + goto exit; + } + + // Fill the result buffer with garbage, so that old results don't carry + // over + uint32_t pattern = 0xffffdead; + memset_pattern4(out[j], &pattern, buffer_size); + if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], + out[j], 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error); + goto exit; + } + + // run the kernel + size_t vectorCount = + (buffer_elements + sizeValues[j] - 1) / sizeValues[j]; + cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its + // own copy of the cl_kernel + cl_program program = job->programs[j]; + + if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]), + &tinfo->outBuf[j]))) + { + LogBuildError(program); + return error; + } + if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), + &tinfo->inBuf))) + { + LogBuildError(program); + return error; + } + if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), + &tinfo->inBuf2))) + { + LogBuildError(program); + return error; + } + + if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, + &vectorCount, NULL, 0, NULL, NULL))) + { + vlog_error("FAILED -- could not execute kernel\n"); + goto exit; + } + } + + // Get that moving + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n"); + + if (gSkipCorrectnessTesting) return CL_SUCCESS; + + // Calculate the correctly rounded reference result + r = (float *)gOut_Ref + thread_id * buffer_elements; + s = (float *)gIn + thread_id * buffer_elements; + s2 = (cl_int *)gIn2 + thread_id * buffer_elements; + for (size_t j = 0; j < buffer_elements; j++) + r[j] = (float)func.f_fi(s[j], s2[j]); + + // Read the data back -- no need to wait for the first N-1 buffers but wait + // for the last buffer. This is an in order queue. + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE; + out[j] = (cl_uint *)clEnqueueMapBuffer( + tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0, + buffer_size, 0, NULL, NULL, &error); + if (error || NULL == out[j]) + { + vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, + error); + goto exit; + } + } + + // Verify data + t = (cl_uint *)r; + for (size_t j = 0; j < buffer_elements; j++) + { + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + cl_uint *q = out[k]; + + // If we aren't getting the correctly rounded result + if (t[j] != q[j]) + { + float test = ((float *)q)[j]; + double correct = func.f_fi(s[j], s2[j]); + float err = Ulp_Error(test, correct); + int fail = !(fabsf(err) <= ulps); + + if (fail && ftz) + { + // retry per section 6.5.3.2 + if (IsFloatResultSubnormal(correct, ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + + // retry per section 6.5.3.3 + if (IsFloatSubnormal(s[j])) + { + double correct2, correct3; + float err2, err3; + correct2 = func.f_fi(0.0, s2[j]); + correct3 = func.f_fi(-0.0, s2[j]); + err2 = Ulp_Error(test, correct2); + err3 = Ulp_Error(test, correct3); + fail = fail + && ((!(fabsf(err2) <= ulps)) + && (!(fabsf(err3) <= ulps))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + + // retry per section 6.5.3.4 + if (IsFloatResultSubnormal(correct2, ulps) + || IsFloatResultSubnormal(correct3, ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + } + } + + if (fabsf(err) > tinfo->maxError) + { + tinfo->maxError = fabsf(err); + tinfo->maxErrorValue = s[j]; + tinfo->maxErrorValue2 = s2[j]; + } + if (fail) + { + vlog_error( + "\nERROR: %s%s: %f ulp error at {%a (0x%8.8x), %d}: " + "*%a (0x%8.8x) vs. %a (0x%8.8x) at index: %d\n", + name, sizeNames[k], err, s[j], ((uint32_t *)s)[j], + s2[j], r[j], ((uint32_t *)r)[j], test, + ((cl_uint *)&test)[0], j); + error = -1; + goto exit; + } + } + } + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], + out[j], 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", + j, error); + return error; + } + } + + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n"); + + + if (0 == (base & 0x0fffffff)) + { + if (gVerboseBruteForce) + { + vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f " + "ThreadCount:%2u\n", + base, job->step, job->scale, buffer_elements, job->ulps, + job->threadCount); + } + else + { + vlog("."); + } + fflush(stdout); + } + +exit: + return error; +} + +} // anonymous namespace + +int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info{}; + cl_int error; + float maxError = 0.0f; + double maxErrorVal = 0.0; + cl_int maxErrorVal2 = 0; + + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + + // Init test_info + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_float)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; + test_info.ftz = + f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + test_info.k[i].resize(test_info.threadCount, nullptr); + } + + test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_float), + test_info.subBufferSize * sizeof(cl_float) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + cl_buffer_region region2 = { i * test_info.subBufferSize + * sizeof(cl_int), + test_info.subBufferSize * sizeof(cl_int) }; + test_info.tinfo[i].inBuf2 = + clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion2, &error); + if (error || NULL == test_info.tinfo[i].inBuf2) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + + test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + // Accumulate the arithmetic errors + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + if (test_info.tinfo[i].maxError > maxError) + { + maxError = test_info.tinfo[i].maxError; + maxErrorVal = test_info.tinfo[i].maxErrorValue; + maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; + } + } + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + for (auto &kernel : test_info.k[i]) + { + clReleaseKernel(kernel); + } + } + + for (auto &threadInfo : test_info.tinfo) + { + free_mtdata(threadInfo.d); + clReleaseMemObject(threadInfo.inBuf); + clReleaseMemObject(threadInfo.inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(threadInfo.outBuf[j]); + clReleaseCommandQueue(threadInfo.tQueue); + } + + return error; +} diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp new file mode 100644 index 0000000000..c407fdaaf1 --- /dev/null +++ b/test_conformance/math_brute_force/binary_operator_double.cpp @@ -0,0 +1,777 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "common.h" +#include "function_list.h" +#include "test_functions.h" +#include "utility.h" + +#include + +namespace { + +int BuildKernel(const char *operator_symbol, int vectorSize, + cl_uint kernel_count, cl_kernel *k, cl_program *p, + bool relaxedMode) +{ + const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global double", + sizeNames[vectorSize], + "* out, __global double", + sizeNames[vectorSize], + "* in1, __global double", + sizeNames[vectorSize], + "* in2 )\n" + "{\n" + " size_t i = get_global_id(0);\n" + " out[i] = in1[i] ", + operator_symbol, + " in2[i];\n" + "}\n" }; + + const char *c3[] = { + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global double* out, __global double* in, __global double* in2)\n" + "{\n" + " size_t i = get_global_id(0);\n" + " if( i + 1 < get_global_size(0) )\n" + " {\n" + " double3 d0 = vload3( 0, in + 3 * i );\n" + " double3 d1 = vload3( 0, in2 + 3 * i );\n" + " d0 = d0 ", + operator_symbol, + " d1;\n" + " vstore3( d0, 0, out + 3*i );\n" + " }\n" + " else\n" + " {\n" + " size_t parity = i & 1; // Figure out how many elements are " + "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " + "buffer size \n" + " double3 d0;\n" + " double3 d1;\n" + " switch( parity )\n" + " {\n" + " case 1:\n" + " d0 = (double3)( in[3*i], NAN, NAN ); \n" + " d1 = (double3)( in2[3*i], NAN, NAN ); \n" + " break;\n" + " case 0:\n" + " d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n" + " d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n" + " break;\n" + " }\n" + " d0 = d0 ", + operator_symbol, + " d1;\n" + " switch( parity )\n" + " {\n" + " case 0:\n" + " out[3*i+1] = d0.y; \n" + " // fall through\n" + " case 1:\n" + " out[3*i] = d0.x; \n" + " break;\n" + " }\n" + " }\n" + "}\n" + }; + + const char **kern = c; + size_t kernSize = sizeof(c) / sizeof(c[0]); + + if (sizeValues[vectorSize] == 3) + { + kern = c3; + kernSize = sizeof(c3) / sizeof(c3[0]); + } + + char testName[32]; + snprintf(testName, sizeof(testName) - 1, "math_kernel%s", + sizeNames[vectorSize]); + + return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, + relaxedMode); +} + +struct BuildKernelInfo +{ + cl_uint offset; // the first vector size to build + cl_uint kernel_count; + KernelMatrix &kernels; + cl_program *programs; + const char *operator_symbol; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. +}; + +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +{ + BuildKernelInfo *info = (BuildKernelInfo *)p; + cl_uint i = info->offset + job_id; + return BuildKernel(info->operator_symbol, i, info->kernel_count, + info->kernels[i].data(), info->programs + i, + info->relaxedMode); +} + +// Thread specific data for a worker thread +struct ThreadInfo +{ + cl_mem inBuf; // input buffer for the thread + cl_mem inBuf2; // input buffer for the thread + cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread + float maxError; // max error value. Init to 0. + double + maxErrorValue; // position of the max error value (param 1). Init to 0. + double maxErrorValue2; // position of the max error value (param 2). Init + // to 0. + MTdata d; + cl_command_queue tQueue; // per thread command queue to improve performance +}; + +struct TestInfo +{ + size_t subBufferSize; // Size of the sub-buffer in elements + const Func *f; // A pointer to the function info + cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes + + // Thread-specific kernels for each vector size: + // k[vector_size][thread_id] + KernelMatrix k; + + // Array of thread specific information + std::vector tinfo; + + cl_uint threadCount; // Number of worker threads + cl_uint jobCount; // Number of jobs + cl_uint step; // step between each chunk and the next. + cl_uint scale; // stride between individual test values + float ulps; // max_allowed ulps + int ftz; // non-zero if running in flush to zero mode + bool relaxedMode; // True if the test is being run in relaxed mode, false + // otherwise. + + // no special fields +}; + +// A table of more difficult cases to get right +const double specialValues[] = { + -NAN, + -INFINITY, + -DBL_MAX, + MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), + MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), + MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), + MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10), + MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), + MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), + MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), + MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), + -1000., + -100., + -4.0, + -3.5, + -3.0, + MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), + -2.5, + MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), + -2.0, + MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), + -1.5, + MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52), + MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), + -1.0, + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53), + MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), + -0.5, + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54), + MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), + -0.25, + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55), + MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), + -DBL_MIN, + MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), + MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), + MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), + MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), + MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), + -0.0, + + +NAN, + +INFINITY, + +DBL_MAX, + MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), + MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11), + MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), + MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10), + MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), + MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), + MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), + MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), + +1000.0, + +100.0, + +4.0, + +3.5, + +3.0, + MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), + +2.5, + MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), + +2.0, + MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), + +1.5, + MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52), + MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), + +1.0, + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53), + MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), + +0.5, + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54), + MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), + +0.25, + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55), + MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), + +DBL_MIN, + MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), + MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), + MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), + MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), + MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), + +0.0, +}; + +constexpr size_t specialValuesCount = + sizeof(specialValues) / sizeof(specialValues[0]); + +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +{ + TestInfo *job = (TestInfo *)data; + size_t buffer_elements = job->subBufferSize; + size_t buffer_size = buffer_elements * sizeof(cl_double); + cl_uint base = job_id * (cl_uint)job->step; + ThreadInfo *tinfo = &(job->tinfo[thread_id]); + float ulps = job->ulps; + dptr func = job->f->dfunc; + int ftz = job->ftz; + MTdata d = tinfo->d; + cl_int error; + const char *name = job->f->name; + cl_ulong *t; + cl_double *r; + cl_double *s; + cl_double *s2; + + Force64BitFPUPrecision(); + + // start the map of the output arrays + cl_event e[VECTOR_SIZE_COUNT]; + cl_ulong *out[VECTOR_SIZE_COUNT]; + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + out[j] = (cl_ulong *)clEnqueueMapBuffer( + tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, + buffer_size, 0, NULL, e + j, &error); + if (error || NULL == out[j]) + { + vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, + error); + return error; + } + } + + // Get that moving + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n"); + + // Init input array + cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements; + cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements; + cl_uint idx = 0; + int totalSpecialValueCount = specialValuesCount * specialValuesCount; + int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements; + + if (job_id <= (cl_uint)lastSpecialJobIndex) + { // test edge cases + cl_double *fp = (cl_double *)p; + cl_double *fp2 = (cl_double *)p2; + uint32_t x, y; + + x = (job_id * buffer_elements) % specialValuesCount; + y = (job_id * buffer_elements) / specialValuesCount; + + for (; idx < buffer_elements; idx++) + { + fp[idx] = specialValues[x]; + fp2[idx] = specialValues[y]; + if (++x >= specialValuesCount) + { + x = 0; + y++; + if (y >= specialValuesCount) break; + } + } + } + + // Init any remaining values. + for (; idx < buffer_elements; idx++) + { + p[idx] = genrand_int64(d); + p2[idx] = genrand_int64(d); + } + + if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, + buffer_size, p, 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error); + goto exit; + } + + if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, + buffer_size, p2, 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + // Wait for the map to finish + if ((error = clWaitForEvents(1, e + j))) + { + vlog_error("Error: clWaitForEvents failed! err: %d\n", error); + goto exit; + } + if ((error = clReleaseEvent(e[j]))) + { + vlog_error("Error: clReleaseEvent failed! err: %d\n", error); + goto exit; + } + + // Fill the result buffer with garbage, so that old results don't carry + // over + uint32_t pattern = 0xffffdead; + memset_pattern4(out[j], &pattern, buffer_size); + if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], + out[j], 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error); + goto exit; + } + + // run the kernel + size_t vectorCount = + (buffer_elements + sizeValues[j] - 1) / sizeValues[j]; + cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its + // own copy of the cl_kernel + cl_program program = job->programs[j]; + + if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]), + &tinfo->outBuf[j]))) + { + LogBuildError(program); + return error; + } + if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), + &tinfo->inBuf))) + { + LogBuildError(program); + return error; + } + if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), + &tinfo->inBuf2))) + { + LogBuildError(program); + return error; + } + + if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, + &vectorCount, NULL, 0, NULL, NULL))) + { + vlog_error("FAILED -- could not execute kernel\n"); + goto exit; + } + } + + // Get that moving + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n"); + + if (gSkipCorrectnessTesting) return CL_SUCCESS; + + // Calculate the correctly rounded reference result + r = (cl_double *)gOut_Ref + thread_id * buffer_elements; + s = (cl_double *)gIn + thread_id * buffer_elements; + s2 = (cl_double *)gIn2 + thread_id * buffer_elements; + for (size_t j = 0; j < buffer_elements; j++) + r[j] = (cl_double)func.f_ff(s[j], s2[j]); + + // Read the data back -- no need to wait for the first N-1 buffers but wait + // for the last buffer. This is an in order queue. + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE; + out[j] = (cl_ulong *)clEnqueueMapBuffer( + tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0, + buffer_size, 0, NULL, NULL, &error); + if (error || NULL == out[j]) + { + vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, + error); + goto exit; + } + } + + // Verify data + t = (cl_ulong *)r; + for (size_t j = 0; j < buffer_elements; j++) + { + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + cl_ulong *q = out[k]; + + // If we aren't getting the correctly rounded result + if (t[j] != q[j]) + { + cl_double test = ((cl_double *)q)[j]; + long double correct = func.f_ff(s[j], s2[j]); + float err = Bruteforce_Ulp_Error_Double(test, correct); + int fail = !(fabsf(err) <= ulps); + + if (fail && ftz) + { + // retry per section 6.5.3.2 + if (IsDoubleResultSubnormal(correct, ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + + + // retry per section 6.5.3.3 + if (IsDoubleSubnormal(s[j])) + { + long double correct2 = func.f_ff(0.0, s2[j]); + long double correct3 = func.f_ff(-0.0, s2[j]); + float err2 = + Bruteforce_Ulp_Error_Double(test, correct2); + float err3 = + Bruteforce_Ulp_Error_Double(test, correct3); + fail = fail + && ((!(fabsf(err2) <= ulps)) + && (!(fabsf(err3) <= ulps))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + + // retry per section 6.5.3.4 + if (IsDoubleResultSubnormal(correct2, ulps) + || IsDoubleResultSubnormal(correct3, ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + + // try with both args as zero + if (IsDoubleSubnormal(s2[j])) + { + correct2 = func.f_ff(0.0, 0.0); + correct3 = func.f_ff(-0.0, 0.0); + long double correct4 = func.f_ff(0.0, -0.0); + long double correct5 = func.f_ff(-0.0, -0.0); + err2 = Bruteforce_Ulp_Error_Double(test, correct2); + err3 = Bruteforce_Ulp_Error_Double(test, correct3); + float err4 = + Bruteforce_Ulp_Error_Double(test, correct4); + float err5 = + Bruteforce_Ulp_Error_Double(test, correct5); + fail = fail + && ((!(fabsf(err2) <= ulps)) + && (!(fabsf(err3) <= ulps)) + && (!(fabsf(err4) <= ulps)) + && (!(fabsf(err5) <= ulps))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + if (fabsf(err4) < fabsf(err)) err = err4; + if (fabsf(err5) < fabsf(err)) err = err5; + + // retry per section 6.5.3.4 + if (IsDoubleResultSubnormal(correct2, ulps) + || IsDoubleResultSubnormal(correct3, ulps) + || IsDoubleResultSubnormal(correct4, ulps) + || IsDoubleResultSubnormal(correct5, ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + } + } + else if (IsDoubleSubnormal(s2[j])) + { + long double correct2 = func.f_ff(s[j], 0.0); + long double correct3 = func.f_ff(s[j], -0.0); + float err2 = + Bruteforce_Ulp_Error_Double(test, correct2); + float err3 = + Bruteforce_Ulp_Error_Double(test, correct3); + fail = fail + && ((!(fabsf(err2) <= ulps)) + && (!(fabsf(err3) <= ulps))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + + // retry per section 6.5.3.4 + if (IsDoubleResultSubnormal(correct2, ulps) + || IsDoubleResultSubnormal(correct3, ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + } + } + + if (fabsf(err) > tinfo->maxError) + { + tinfo->maxError = fabsf(err); + tinfo->maxErrorValue = s[j]; + tinfo->maxErrorValue2 = s2[j]; + } + if (fail) + { + vlog_error( + "\nERROR: %s%s: %f ulp error at {%a, %a}: *%a vs. %a\n", + name, sizeNames[k], err, s[j], s2[j], r[j], test); + error = -1; + goto exit; + } + } + } + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], + out[j], 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", + j, error); + return error; + } + } + + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n"); + + + if (0 == (base & 0x0fffffff)) + { + if (gVerboseBruteForce) + { + vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f " + "ThreadCount:%2u\n", + base, job->step, job->scale, buffer_elements, job->ulps, + job->threadCount); + } + else + { + vlog("."); + } + fflush(stdout); + } + +exit: + return error; +} + +} // anonymous namespace + +int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d, + bool relaxedMode) +{ + TestInfo test_info{}; + cl_int error; + float maxError = 0.0f; + double maxErrorVal = 0.0; + double maxErrorVal2 = 0.0; + + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); + + // Init test_info + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_double)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ulps = f->double_ulps; + test_info.ftz = f->ftz || gForceFTZ; + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + test_info.k[i].resize(test_info.threadCount, nullptr); + } + + test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_double), + test_info.subBufferSize * sizeof(cl_double) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + test_info.tinfo[i].inBuf2 = + clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf2) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + + test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + // Accumulate the arithmetic errors + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + if (test_info.tinfo[i].maxError > maxError) + { + maxError = test_info.tinfo[i].maxError; + maxErrorVal = test_info.tinfo[i].maxErrorValue; + maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; + } + } + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + for (auto &kernel : test_info.k[i]) + { + clReleaseKernel(kernel); + } + } + + for (auto &threadInfo : test_info.tinfo) + { + free_mtdata(threadInfo.d); + clReleaseMemObject(threadInfo.inBuf); + clReleaseMemObject(threadInfo.inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(threadInfo.outBuf[j]); + clReleaseCommandQueue(threadInfo.tQueue); + } + + return error; +} diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp new file mode 100644 index 0000000000..7fbb07c280 --- /dev/null +++ b/test_conformance/math_brute_force/binary_operator_float.cpp @@ -0,0 +1,905 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "common.h" +#include "function_list.h" +#include "test_functions.h" +#include "utility.h" + +#include + +namespace { + +int BuildKernel(const char *operator_symbol, int vectorSize, + cl_uint kernel_count, cl_kernel *k, cl_program *p, + bool relaxedMode) +{ + const char *c[] = { "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global float", + sizeNames[vectorSize], + "* out, __global float", + sizeNames[vectorSize], + "* in1, __global float", + sizeNames[vectorSize], + "* in2 )\n" + "{\n" + " size_t i = get_global_id(0);\n" + " out[i] = in1[i] ", + operator_symbol, + " in2[i];\n" + "}\n" }; + + const char *c3[] = { + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global float* out, __global float* in, __global float* in2)\n" + "{\n" + " size_t i = get_global_id(0);\n" + " if( i + 1 < get_global_size(0) )\n" + " {\n" + " float3 f0 = vload3( 0, in + 3 * i );\n" + " float3 f1 = vload3( 0, in2 + 3 * i );\n" + " f0 = f0 ", + operator_symbol, + " f1;\n" + " vstore3( f0, 0, out + 3*i );\n" + " }\n" + " else\n" + " {\n" + " size_t parity = i & 1; // Figure out how many elements are " + "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " + "buffer size \n" + " float3 f0;\n" + " float3 f1;\n" + " switch( parity )\n" + " {\n" + " case 1:\n" + " f0 = (float3)( in[3*i], NAN, NAN ); \n" + " f1 = (float3)( in2[3*i], NAN, NAN ); \n" + " break;\n" + " case 0:\n" + " f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n" + " f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n" + " break;\n" + " }\n" + " f0 = f0 ", + operator_symbol, + " f1;\n" + " switch( parity )\n" + " {\n" + " case 0:\n" + " out[3*i+1] = f0.y; \n" + " // fall through\n" + " case 1:\n" + " out[3*i] = f0.x; \n" + " break;\n" + " }\n" + " }\n" + "}\n" + }; + + const char **kern = c; + size_t kernSize = sizeof(c) / sizeof(c[0]); + + if (sizeValues[vectorSize] == 3) + { + kern = c3; + kernSize = sizeof(c3) / sizeof(c3[0]); + } + + char testName[32]; + snprintf(testName, sizeof(testName) - 1, "math_kernel%s", + sizeNames[vectorSize]); + + return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, + relaxedMode); +} + +struct BuildKernelInfo +{ + cl_uint offset; // the first vector size to build + cl_uint kernel_count; + KernelMatrix &kernels; + cl_program *programs; + const char *operator_symbol; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. +}; + +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +{ + BuildKernelInfo *info = (BuildKernelInfo *)p; + cl_uint i = info->offset + job_id; + return BuildKernel(info->operator_symbol, i, info->kernel_count, + info->kernels[i].data(), info->programs + i, + info->relaxedMode); +} + +// Thread specific data for a worker thread +struct ThreadInfo +{ + cl_mem inBuf; // input buffer for the thread + cl_mem inBuf2; // input buffer for the thread + cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread + float maxError; // max error value. Init to 0. + double + maxErrorValue; // position of the max error value (param 1). Init to 0. + double maxErrorValue2; // position of the max error value (param 2). Init + // to 0. + MTdata d; + cl_command_queue tQueue; // per thread command queue to improve performance +}; + +struct TestInfo +{ + size_t subBufferSize; // Size of the sub-buffer in elements + const Func *f; // A pointer to the function info + cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes + + // Thread-specific kernels for each vector size: + // k[vector_size][thread_id] + KernelMatrix k; + + // Array of thread specific information + std::vector tinfo; + + cl_uint threadCount; // Number of worker threads + cl_uint jobCount; // Number of jobs + cl_uint step; // step between each chunk and the next. + cl_uint scale; // stride between individual test values + float ulps; // max_allowed ulps + int ftz; // non-zero if running in flush to zero mode + bool relaxedMode; // True if the test is being run in relaxed mode, false + // otherwise. + + // no special fields +}; + +// A table of more difficult cases to get right +const float specialValues[] = { + -NAN, + -INFINITY, + -FLT_MAX, + MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), + MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), + MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39), + MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), + MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), + MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38), + MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), + MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), + MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), + MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), + MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), + MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), + -1000.f, + -100.f, + -4.0f, + -3.5f, + -3.0f, + MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), + -2.5f, + MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), + -2.0f, + MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), + -1.5f, + MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24), + MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), + -1.0f, + MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25), + MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), + -0.5f, + MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), + MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), + -0.25f, + MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27), + MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), + -FLT_MIN, + MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), + MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), + MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), + MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), + MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), + MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150), + MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), + MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), + MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), + MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), + -0.0f, + + +NAN, + +INFINITY, + +FLT_MAX, + MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), + MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), + MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), + MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), + MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), + MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38), + MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), + MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), + MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), + MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), + MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), + MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), + +1000.f, + +100.f, + +4.0f, + +3.5f, + +3.0f, + MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), + 2.5f, + MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23), + +2.0f, + MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), + 1.5f, + MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), + MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), + +1.0f, + MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25), + MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), + +0.5f, + MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), + MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), + +0.25f, + MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27), + MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), + +FLT_MIN, + MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), + MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), + MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), + MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), + MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), + MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150), + MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), + MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), + MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), + MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), + +0.0f, +}; + +constexpr size_t specialValuesCount = + sizeof(specialValues) / sizeof(specialValues[0]); + +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +{ + TestInfo *job = (TestInfo *)data; + size_t buffer_elements = job->subBufferSize; + size_t buffer_size = buffer_elements * sizeof(cl_float); + cl_uint base = job_id * (cl_uint)job->step; + ThreadInfo *tinfo = &(job->tinfo[thread_id]); + fptr func = job->f->func; + int ftz = job->ftz; + bool relaxedMode = job->relaxedMode; + float ulps = getAllowedUlpError(job->f, relaxedMode); + MTdata d = tinfo->d; + cl_int error; + std::vector overflow(buffer_elements, false); + const char *name = job->f->name; + cl_uint *t = 0; + cl_float *r = 0; + cl_float *s = 0; + cl_float *s2 = 0; + RoundingMode oldRoundMode; + + if (relaxedMode) + { + func = job->f->rfunc; + } + + // start the map of the output arrays + cl_event e[VECTOR_SIZE_COUNT]; + cl_uint *out[VECTOR_SIZE_COUNT]; + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + out[j] = (cl_uint *)clEnqueueMapBuffer( + tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, + buffer_size, 0, NULL, e + j, &error); + if (error || NULL == out[j]) + { + vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, + error); + return error; + } + } + + // Get that moving + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n"); + + // Init input array + cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements; + cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements; + cl_uint idx = 0; + int totalSpecialValueCount = specialValuesCount * specialValuesCount; + int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements; + + if (job_id <= (cl_uint)lastSpecialJobIndex) + { + // Insert special values + uint32_t x, y; + + x = (job_id * buffer_elements) % specialValuesCount; + y = (job_id * buffer_elements) / specialValuesCount; + + for (; idx < buffer_elements; idx++) + { + p[idx] = ((cl_uint *)specialValues)[x]; + p2[idx] = ((cl_uint *)specialValues)[y]; + ++x; + if (x >= specialValuesCount) + { + x = 0; + y++; + if (y >= specialValuesCount) break; + } + if (relaxedMode && strcmp(name, "divide") == 0) + { + cl_uint pj = p[idx] & 0x7fffffff; + cl_uint p2j = p2[idx] & 0x7fffffff; + // Replace values outside [2^-62, 2^62] with QNaN + if (pj < 0x20800000 || pj > 0x5e800000) p[idx] = 0x7fc00000; + if (p2j < 0x20800000 || p2j > 0x5e800000) p2[idx] = 0x7fc00000; + } + } + } + + // Init any remaining values. + for (; idx < buffer_elements; idx++) + { + p[idx] = genrand_int32(d); + p2[idx] = genrand_int32(d); + + if (relaxedMode && strcmp(name, "divide") == 0) + { + cl_uint pj = p[idx] & 0x7fffffff; + cl_uint p2j = p2[idx] & 0x7fffffff; + // Replace values outside [2^-62, 2^62] with QNaN + if (pj < 0x20800000 || pj > 0x5e800000) p[idx] = 0x7fc00000; + if (p2j < 0x20800000 || p2j > 0x5e800000) p2[idx] = 0x7fc00000; + } + } + + if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, + buffer_size, p, 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error); + goto exit; + } + + if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, + buffer_size, p2, 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + // Wait for the map to finish + if ((error = clWaitForEvents(1, e + j))) + { + vlog_error("Error: clWaitForEvents failed! err: %d\n", error); + goto exit; + } + if ((error = clReleaseEvent(e[j]))) + { + vlog_error("Error: clReleaseEvent failed! err: %d\n", error); + goto exit; + } + + // Fill the result buffer with garbage, so that old results don't carry + // over + uint32_t pattern = 0xffffdead; + memset_pattern4(out[j], &pattern, buffer_size); + if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], + out[j], 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error); + goto exit; + } + + // run the kernel + size_t vectorCount = + (buffer_elements + sizeValues[j] - 1) / sizeValues[j]; + cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its + // own copy of the cl_kernel + cl_program program = job->programs[j]; + + if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]), + &tinfo->outBuf[j]))) + { + LogBuildError(program); + return error; + } + if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), + &tinfo->inBuf))) + { + LogBuildError(program); + return error; + } + if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), + &tinfo->inBuf2))) + { + LogBuildError(program); + return error; + } + + if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, + &vectorCount, NULL, 0, NULL, NULL))) + { + vlog_error("FAILED -- could not execute kernel\n"); + goto exit; + } + } + + // Get that moving + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n"); + + if (gSkipCorrectnessTesting) + { + return CL_SUCCESS; + } + + // Calculate the correctly rounded reference result + FPU_mode_type oldMode; + memset(&oldMode, 0, sizeof(oldMode)); + if (ftz) ForceFTZ(&oldMode); + + // Set the rounding mode to match the device + oldRoundMode = kRoundToNearestEven; + if (gIsInRTZMode) oldRoundMode = set_round(kRoundTowardZero, kfloat); + + // Calculate the correctly rounded reference result + r = (float *)gOut_Ref + thread_id * buffer_elements; + s = (float *)gIn + thread_id * buffer_elements; + s2 = (float *)gIn2 + thread_id * buffer_elements; + if (gInfNanSupport) + { + for (size_t j = 0; j < buffer_elements; j++) + r[j] = (float)func.f_ff(s[j], s2[j]); + } + else + { + for (size_t j = 0; j < buffer_elements; j++) + { + feclearexcept(FE_OVERFLOW); + r[j] = (float)func.f_ff(s[j], s2[j]); + overflow[j] = + FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW)); + } + } + + if (gIsInRTZMode) (void)set_round(oldRoundMode, kfloat); + + if (ftz) RestoreFPState(&oldMode); + + // Read the data back -- no need to wait for the first N-1 buffers but wait + // for the last buffer. This is an in order queue. + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE; + out[j] = (cl_uint *)clEnqueueMapBuffer( + tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0, + buffer_size, 0, NULL, NULL, &error); + if (error || NULL == out[j]) + { + vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, + error); + goto exit; + } + } + + // Verify data + t = (cl_uint *)r; + for (size_t j = 0; j < buffer_elements; j++) + { + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + cl_uint *q = out[k]; + + // If we aren't getting the correctly rounded result + if (t[j] != q[j]) + { + float test = ((float *)q)[j]; + double correct = func.f_ff(s[j], s2[j]); + + // Per section 10 paragraph 6, accept any result if an input or + // output is a infinity or NaN or overflow + if (!gInfNanSupport) + { + // Note: no double rounding here. Reference functions + // calculate in single precision. + if (overflow[j] || IsFloatInfinity(correct) + || IsFloatNaN(correct) || IsFloatInfinity(s2[j]) + || IsFloatNaN(s2[j]) || IsFloatInfinity(s[j]) + || IsFloatNaN(s[j])) + continue; + } + + // Per section 10 paragraph 6, accept embedded devices always + // returning positive 0.0. + if (gIsEmbedded && (t[j] == 0x80000000) && (q[j] == 0x00000000)) + continue; + + float err = Ulp_Error(test, correct); + float errB = Ulp_Error(test, (float)correct); + + int fail = + ((!(fabsf(err) <= ulps)) && (!(fabsf(errB) <= ulps))); + if (fabsf(errB) < fabsf(err)) err = errB; + + if (fail && ftz) + { + // retry per section 6.5.3.2 + if (IsFloatResultSubnormal(correct, ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + + // retry per section 6.5.3.3 + if (IsFloatSubnormal(s[j])) + { + double correct2, correct3; + float err2, err3; + + if (!gInfNanSupport) feclearexcept(FE_OVERFLOW); + + correct2 = func.f_ff(0.0, s2[j]); + correct3 = func.f_ff(-0.0, s2[j]); + + // Per section 10 paragraph 6, accept any result if an + // input or output is a infinity or NaN or overflow + if (!gInfNanSupport) + { + if (fetestexcept(FE_OVERFLOW)) continue; + + // Note: no double rounding here. Reference + // functions calculate in single precision. + if (IsFloatInfinity(correct2) + || IsFloatNaN(correct2) + || IsFloatInfinity(correct3) + || IsFloatNaN(correct3)) + continue; + } + + err2 = Ulp_Error(test, correct2); + err3 = Ulp_Error(test, correct3); + fail = fail + && ((!(fabsf(err2) <= ulps)) + && (!(fabsf(err3) <= ulps))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + + // retry per section 6.5.3.4 + if (IsFloatResultSubnormal(correct2, ulps) + || IsFloatResultSubnormal(correct3, ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + + // try with both args as zero + if (IsFloatSubnormal(s2[j])) + { + double correct4, correct5; + float err4, err5; + + if (!gInfNanSupport) feclearexcept(FE_OVERFLOW); + + correct2 = func.f_ff(0.0, 0.0); + correct3 = func.f_ff(-0.0, 0.0); + correct4 = func.f_ff(0.0, -0.0); + correct5 = func.f_ff(-0.0, -0.0); + + // Per section 10 paragraph 6, accept any result if + // an input or output is a infinity or NaN or + // overflow + if (!gInfNanSupport) + { + if (fetestexcept(FE_OVERFLOW)) continue; + + // Note: no double rounding here. Reference + // functions calculate in single precision. + if (IsFloatInfinity(correct2) + || IsFloatNaN(correct2) + || IsFloatInfinity(correct3) + || IsFloatNaN(correct3) + || IsFloatInfinity(correct4) + || IsFloatNaN(correct4) + || IsFloatInfinity(correct5) + || IsFloatNaN(correct5)) + continue; + } + + err2 = Ulp_Error(test, correct2); + err3 = Ulp_Error(test, correct3); + err4 = Ulp_Error(test, correct4); + err5 = Ulp_Error(test, correct5); + fail = fail + && ((!(fabsf(err2) <= ulps)) + && (!(fabsf(err3) <= ulps)) + && (!(fabsf(err4) <= ulps)) + && (!(fabsf(err5) <= ulps))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + if (fabsf(err4) < fabsf(err)) err = err4; + if (fabsf(err5) < fabsf(err)) err = err5; + + // retry per section 6.5.3.4 + if (IsFloatResultSubnormal(correct2, ulps) + || IsFloatResultSubnormal(correct3, ulps) + || IsFloatResultSubnormal(correct4, ulps) + || IsFloatResultSubnormal(correct5, ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + } + } + else if (IsFloatSubnormal(s2[j])) + { + double correct2, correct3; + float err2, err3; + + if (!gInfNanSupport) feclearexcept(FE_OVERFLOW); + + correct2 = func.f_ff(s[j], 0.0); + correct3 = func.f_ff(s[j], -0.0); + + // Per section 10 paragraph 6, accept any result if an + // input or output is a infinity or NaN or overflow + if (!gInfNanSupport) + { + // Note: no double rounding here. Reference + // functions calculate in single precision. + if (overflow[j] || IsFloatInfinity(correct) + || IsFloatNaN(correct) + || IsFloatInfinity(correct2) + || IsFloatNaN(correct2)) + continue; + } + + err2 = Ulp_Error(test, correct2); + err3 = Ulp_Error(test, correct3); + fail = fail + && ((!(fabsf(err2) <= ulps)) + && (!(fabsf(err3) <= ulps))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + + // retry per section 6.5.3.4 + if (IsFloatResultSubnormal(correct2, ulps) + || IsFloatResultSubnormal(correct3, ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + } + } + + + if (fabsf(err) > tinfo->maxError) + { + tinfo->maxError = fabsf(err); + tinfo->maxErrorValue = s[j]; + tinfo->maxErrorValue2 = s2[j]; + } + if (fail) + { + vlog_error("\nERROR: %s%s: %f ulp error at {%a, %a}: *%a " + "vs. %a (0x%8.8x) at index: %d\n", + name, sizeNames[k], err, s[j], s2[j], r[j], test, + ((cl_uint *)&test)[0], j); + error = -1; + goto exit; + } + } + } + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], + out[j], 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", + j, error); + return error; + } + } + + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n"); + + + if (0 == (base & 0x0fffffff)) + { + if (gVerboseBruteForce) + { + vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f " + "ThreadCount:%2u\n", + base, job->step, job->scale, buffer_elements, job->ulps, + job->threadCount); + } + else + { + vlog("."); + } + fflush(stdout); + } + +exit: + return error; +} + +} // anonymous namespace + +int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, + bool relaxedMode) +{ + TestInfo test_info{}; + cl_int error; + float maxError = 0.0f; + double maxErrorVal = 0.0; + double maxErrorVal2 = 0.0; + + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + + // Init test_info + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_float)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; + test_info.ftz = + f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + test_info.relaxedMode = relaxedMode; + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + test_info.k[i].resize(test_info.threadCount, nullptr); + } + + test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_float), + test_info.subBufferSize * sizeof(cl_float) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + test_info.tinfo[i].inBuf2 = + clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf2) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + + test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + // Accumulate the arithmetic errors + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + if (test_info.tinfo[i].maxError > maxError) + { + maxError = test_info.tinfo[i].maxError; + maxErrorVal = test_info.tinfo[i].maxErrorValue; + maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; + } + } + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + for (auto &kernel : test_info.k[i]) + { + clReleaseKernel(kernel); + } + } + + for (auto &threadInfo : test_info.tinfo) + { + free_mtdata(threadInfo.d); + clReleaseMemObject(threadInfo.inBuf); + clReleaseMemObject(threadInfo.inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(threadInfo.outBuf[j]); + clReleaseCommandQueue(threadInfo.tQueue); + } + + return error; +} diff --git a/test_conformance/math_brute_force/binary_two_results_i.cpp b/test_conformance/math_brute_force/binary_two_results_i.cpp deleted file mode 100644 index c5577b9e60..0000000000 --- a/test_conformance/math_brute_force/binary_two_results_i.cpp +++ /dev/null @@ -1,1141 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "Utility.h" - -#include -#include -#include "FunctionList.h" - -#define PARALLEL_REFERENCE - -int TestFunc_FloatI_Float_Float(const Func *f, MTdata, bool relaxedMode); -int TestFunc_DoubleI_Double_Double(const Func *f, MTdata, bool relaxedMode); - -extern const vtbl _binary_two_results_i = { "binary_two_results_i", - TestFunc_FloatI_Float_Float, - TestFunc_DoubleI_Double_Double }; - -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode); -static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode); - -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) -{ - const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global int", sizeNames[vectorSize], "* out2, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2)\n" - "{\n" - " int i = get_global_id(0);\n" - " out[i] = ", name, "( in1[i], in2[i], out2 + i );\n" - "}\n" - }; - - const char *c3[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global int* out2, __global float* in, __global float* in2)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " float3 f0 = vload3( 0, in + 3 * i );\n" - " float3 f1 = vload3( 0, in2 + 3 * i );\n" - " int3 i0 = 0xdeaddead;\n" - " f0 = ", name, "( f0, f1, &i0 );\n" - " vstore3( f0, 0, out + 3*i );\n" - " vstore3( i0, 0, out2 + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n" - " float3 f0, f1;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " f0 = (float3)( in[3*i], NAN, NAN ); \n" - " f1 = (float3)( in2[3*i], NAN, NAN ); \n" - " break;\n" - " case 0:\n" - " f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n" - " f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n" - " break;\n" - " }\n" - " int3 i0 = 0xdeaddead;\n" - " f0 = ", name, "( f0, f1, &i0 );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = f0.y; \n" - " out2[3*i+1] = i0.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = f0.x; \n" - " out2[3*i] = i0.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - - const char **kern = c; - size_t kernSize = sizeof(c)/sizeof(c[0]); - - if( sizeValues[vectorSize] == 3 ) - { - kern = c3; - kernSize = sizeof(c3)/sizeof(c3[0]); - } - - char testName[32]; - snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - - return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); -} - -static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) -{ - const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global int", sizeNames[vectorSize], "* out2, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2)\n" - "{\n" - " int i = get_global_id(0);\n" - " out[i] = ", name, "( in1[i], in2[i], out2 + i );\n" - "}\n" - }; - - const char *c3[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global int* out2, __global double* in, __global double* in2)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " double3 d0 = vload3( 0, in + 3 * i );\n" - " double3 d1 = vload3( 0, in2 + 3 * i );\n" - " int3 i0 = 0xdeaddead;\n" - " d0 = ", name, "( d0, d1, &i0 );\n" - " vstore3( d0, 0, out + 3*i );\n" - " vstore3( i0, 0, out2 + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n" - " double3 d0, d1;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " d0 = (double3)( in[3*i], NAN, NAN ); \n" - " d1 = (double3)( in2[3*i], NAN, NAN ); \n" - " break;\n" - " case 0:\n" - " d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n" - " d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n" - " break;\n" - " }\n" - " int3 i0 = 0xdeaddead;\n" - " d0 = ", name, "( d0, d1, &i0 );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = d0.y; \n" - " out2[3*i+1] = i0.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = d0.x; \n" - " out2[3*i] = i0.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - - const char **kern = c; - size_t kernSize = sizeof(c)/sizeof(c[0]); - - if( sizeValues[vectorSize] == 3 ) - { - kern = c3; - kernSize = sizeof(c3)/sizeof(c3[0]); - } - - char testName[32]; - snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - - return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); -} - -typedef struct BuildKernelInfo -{ - cl_uint offset; // the first vector size to build - cl_kernel *kernels; - cl_program *programs; - const char *nameInCode; - bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -}BuildKernelInfo; - -static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); -static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ) -{ - BuildKernelInfo *info = (BuildKernelInfo*) p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernels + i, - info->programs + i, info->relaxedMode); -} - -static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); -static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ) -{ - BuildKernelInfo *info = (BuildKernelInfo*) p; - cl_uint i = info->offset + job_id; - return BuildKernelDouble(info->nameInCode, i, info->kernels + i, - info->programs + i, info->relaxedMode); -} - -#if defined PARALLEL_REFERENCE -typedef struct ComputeReferenceInfoF_ -{ - const float *x; - const float *y; - float *r; - int *i; - double (*f_ffpI)(double, double, int*); - cl_uint lim; - cl_uint count; -} ComputeReferenceInfoF; - -typedef struct ComputeReferenceInfoD_ -{ - const double *x; - const double *y; - double *r; - int *i; - long double (*f_ffpI)(long double, long double, int*); - cl_uint lim; - cl_uint count; -} ComputeReferenceInfoD; - -static cl_int -ReferenceF(cl_uint jid, cl_uint tid, void *userInfo) -{ - ComputeReferenceInfoF *cri = (ComputeReferenceInfoF *)userInfo; - cl_uint lim = cri->lim; - cl_uint count = cri->count; - cl_uint off = jid * count; - const float *x = cri->x + off; - const float *y = cri->y + off; - float *r = cri->r + off; - int *i = cri->i + off; - double (*f)(double, double, int *) = cri->f_ffpI; - cl_uint j; - - if (off + count > lim) - count = lim - off; - - for (j = 0; j < count; ++j) - r[j] = (float)f((double)x[j], (double)y[j], i + j); - - return CL_SUCCESS; -} - -static cl_int -ReferenceD(cl_uint jid, cl_uint tid, void *userInfo) -{ - ComputeReferenceInfoD *cri = (ComputeReferenceInfoD *)userInfo; - cl_uint lim = cri->lim; - cl_uint count = cri->count; - cl_uint off = jid * count; - const double *x = cri->x + off; - const double *y = cri->y + off; - double *r = cri->r + off; - int *i = cri->i + off; - long double (*f)(long double, long double, int *) = cri->f_ffpI; - cl_uint j; - - if (off + count > lim) - count = lim - off; - - Force64BitFPUPrecision(); - - for (j = 0; j < count; ++j) - r[j] = (double)f((long double)x[j], (long double)y[j], i + j); - - return CL_SUCCESS; -} - -#endif - -int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode) -{ - uint64_t i; - uint32_t j, k; - int error; - cl_program programs[ VECTOR_SIZE_COUNT ]; - cl_kernel kernels[ VECTOR_SIZE_COUNT ]; - float maxError = 0.0f; - float float_ulps; - int64_t maxError2 = 0; - int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); - float maxErrorVal = 0.0f; - float maxErrorVal2 = 0.0f; - size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE; - uint64_t step = bufferSize / sizeof( float ); - -#if defined PARALLEL_REFERENCE - cl_uint threadCount = GetThreadCount(); -#endif - logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - - if(gWimpyMode ){ - step = (1ULL<<32) * gWimpyReductionFactor / (512); - } - - if( gIsEmbedded ) - float_ulps = f->float_embedded_ulps; - else - float_ulps = f->float_ulps; - - int testingRemquo = !strcmp(f->name, "remquo"); - - // Init the kernels - { - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; - if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) - return error; - } -/* - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - if( (error = BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) ) - return error; -*/ - - for( i = 0; i < (1ULL<<32); i += step ) - { - //Init input array - cl_uint *p = (cl_uint *)gIn; - cl_uint *p2 = (cl_uint *)gIn2; - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - { - p[j] = genrand_int32(d); - p2[j] = genrand_int32(d); - } - - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_TRUE, 0, bufferSize, gIn2, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error ); - return error; - } - - // write garbage into output arrays - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - uint32_t pattern = 0xffffdead; - memset_pattern4(gOut[j], &pattern, bufferSize); - if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j ); - goto exit; - } - - memset_pattern4(gOut2[j], &pattern, bufferSize); - if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, 0, bufferSize, gOut2[j], 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", error, j ); - goto exit; - } - } - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeof( cl_float ) * sizeValues[j]; - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; // bufferSize / vectorSize rounded up - if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; } - - if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - } - - // Get that moving - if( (error = clFlush(gQueue) )) - vlog( "clFlush failed\n" ); - - // Calculate the correctly rounded reference result - float *s = (float *)gIn; - float *s2 = (float *)gIn2; - -#if defined PARALLEL_REFERENCE - if (threadCount > 1) { - ComputeReferenceInfoF cri; - cri.x = s; - cri.y = s2; - cri.r = (float *)gOut_Ref; - cri.i = (int *)gOut_Ref2; - cri.f_ffpI = f->func.f_ffpI; - cri.lim = bufferSize / sizeof( float ); - cri.count = (cri.lim + threadCount - 1) / threadCount; - ThreadPool_Do(ReferenceF, threadCount, &cri); - } else { -#endif - float *r = (float *)gOut_Ref; - int *r2 = (int *)gOut_Ref2; - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - r[j] = (float) f->func.f_ffpI( s[j], s2[j], r2+j ); -#if defined PARALLEL_REFERENCE - } -#endif - - // Read the data back - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) ) - { - vlog_error( "ReadArray failed %d\n", error ); - goto exit; - } - if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL)) ) - { - vlog_error( "ReadArray2 failed %d\n", error ); - goto exit; - } - } - - if( gSkipCorrectnessTesting ) - break; - - //Verify data - uint32_t *t = (uint32_t *)gOut_Ref; - int32_t *t2 = (int32_t *)gOut_Ref2; - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - { - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - uint32_t *q = (uint32_t *)gOut[k]; - int32_t *q2 = (int32_t *)gOut2[k]; - - // Check for exact match to correctly rounded result - if (t[j] == q[j] && t2[j] == q2[j]) - continue; - - // Check for paired NaNs - if ((t[j] & 0x7fffffff) > 0x7f800000 && (q[j] & 0x7fffffff) > 0x7f800000 && t2[j] == q2[j]) - continue; - - // if( t[j] != q[j] || t2[j] != q2[j] ) - { - float test = ((float*) q)[j]; - int correct2 = INT_MIN; - double correct = f->func.f_ffpI( s[j], s2[j], &correct2 ); - float err = Ulp_Error( test, correct ); - int64_t iErr; - - // in case of remquo, we only care about the sign and last seven bits of - // integer as per the spec. - if(testingRemquo) - iErr = (long long) (q2[j] & 0x0000007f) - (long long) (correct2 & 0x0000007f); - else - iErr = (long long) q2[j] - (long long) correct2; - - //For remquo, if y = 0, x is infinite, or either is NaN then the standard either neglects - //to say what is returned in iptr or leaves it undefined or implementation defined. - int iptrUndefined = fabs(((float*) gIn)[j]) == INFINITY || - ((float*) gIn2)[j] == 0.0f || - isnan(((float*) gIn2)[j]) || - isnan(((float*) gIn)[j]); - if(iptrUndefined) - iErr = 0; - - int fail = ! (fabsf(err) <= float_ulps && iErr == 0 ); - if( ftz && fail ) - { - // retry per section 6.5.3.2 - if( IsFloatResultSubnormal(correct, float_ulps ) ) - { - fail = fail && ! ( test == 0.0f && iErr == 0 ); - if( ! fail ) - err = 0.0f; - } - - // retry per section 6.5.3.3 - if( IsFloatSubnormal( s[j] ) ) - { - int correct3i, correct4i; - double correct3 = f->func.f_ffpI( 0.0, s2[j], &correct3i ); - double correct4 = f->func.f_ffpI( -0.0, s2[j], &correct4i ); - float err2 = Ulp_Error( test, correct3 ); - float err3 = Ulp_Error( test, correct4 ); - int64_t iErr3 = (long long) q2[j] - (long long) correct3i; - int64_t iErr4 = (long long) q2[j] - (long long) correct4i; - fail = fail && ((!(fabsf(err2) <= float_ulps && iErr3 == 0)) && (!(fabsf(err3) <= float_ulps && iErr4 == 0))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - if( llabs(iErr3) < llabs( iErr ) ) - iErr = iErr3; - if( llabs(iErr4) < llabs( iErr ) ) - iErr = iErr4; - - // retry per section 6.5.3.4 - if( IsFloatResultSubnormal(correct2, float_ulps ) || IsFloatResultSubnormal(correct3, float_ulps ) ) - { - fail = fail && ! ( test == 0.0f && (iErr3 == 0 || iErr4 == 0) ); - if( ! fail ) - err = 0.0f; - } - - //try with both args as zero - if( IsFloatSubnormal( s2[j] ) ) - { - int correct7i, correct8i; - correct3 = f->func.f_ffpI( 0.0, 0.0, &correct3i ); - correct4 = f->func.f_ffpI( -0.0, 0.0, &correct4i ); - double correct7 = f->func.f_ffpI( 0.0, -0.0, &correct7i ); - double correct8 = f->func.f_ffpI( -0.0, -0.0, &correct8i ); - err2 = Ulp_Error( test, correct3 ); - err3 = Ulp_Error( test, correct4 ); - float err4 = Ulp_Error( test, correct7 ); - float err5 = Ulp_Error( test, correct8 ); - iErr3 = (long long) q2[j] - (long long) correct3i; - iErr4 = (long long) q2[j] - (long long) correct4i; - int64_t iErr7 = (long long) q2[j] - (long long) correct7i; - int64_t iErr8 = (long long) q2[j] - (long long) correct8i; - fail = fail && ((!(fabsf(err2) <= float_ulps && iErr3 == 0)) && (!(fabsf(err3) <= float_ulps && iErr4 == 0)) && - (!(fabsf(err4) <= float_ulps && iErr7 == 0)) && (!(fabsf(err5) <= float_ulps && iErr8 == 0))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - if( fabsf( err4 ) < fabsf(err ) ) - err = err4; - if( fabsf( err5 ) < fabsf(err ) ) - err = err5; - if( llabs(iErr3) < llabs( iErr ) ) - iErr = iErr3; - if( llabs(iErr4) < llabs( iErr ) ) - iErr = iErr4; - if( llabs(iErr7) < llabs( iErr ) ) - iErr = iErr7; - if( llabs(iErr8) < llabs( iErr ) ) - iErr = iErr8; - - // retry per section 6.5.3.4 - if( IsFloatResultSubnormal(correct3, float_ulps ) || IsFloatResultSubnormal(correct4, float_ulps ) || - IsFloatResultSubnormal(correct7, float_ulps ) || IsFloatResultSubnormal(correct8, float_ulps ) ) - { - fail = fail && ! ( test == 0.0f && (iErr3 == 0 || iErr4 == 0 || iErr7 == 0 || iErr8 == 0)); - if( ! fail ) - err = 0.0f; - } - } - } - else if( IsFloatSubnormal( s2[j] ) ) - { - int correct3i, correct4i; - double correct3 = f->func.f_ffpI( s[j], 0.0, &correct3i ); - double correct4 = f->func.f_ffpI( s[j], -0.0, &correct4i ); - float err2 = Ulp_Error( test, correct3 ); - float err3 = Ulp_Error( test, correct4 ); - int64_t iErr3 = (long long) q2[j] - (long long) correct3i; - int64_t iErr4 = (long long) q2[j] - (long long) correct4i; - fail = fail && ((!(fabsf(err2) <= float_ulps && iErr3 == 0)) && (!(fabsf(err3) <= float_ulps && iErr4 == 0))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - if( llabs(iErr3) < llabs( iErr ) ) - iErr = iErr3; - if( llabs(iErr4) < llabs( iErr ) ) - iErr = iErr4; - - // retry per section 6.5.3.4 - if( IsFloatResultSubnormal(correct2, float_ulps ) || IsFloatResultSubnormal(correct3, float_ulps ) ) - { - fail = fail && ! ( test == 0.0f && (iErr3 == 0 || iErr4 == 0) ); - if( ! fail ) - err = 0.0f; - } - } - } - if( fabsf(err ) > maxError ) - { - maxError = fabsf(err); - maxErrorVal = s[j]; - } - if( llabs(iErr) > maxError2 ) - { - maxError2 = llabs(iErr ); - maxErrorVal2 = s[j]; - } - - if( fail ) - { - vlog_error( "\nERROR: %s%s: {%f, %lld} ulp error at {%a, %a} ({0x%8.8x, 0x%8.8x}): *{%a, %d} ({0x%8.8x, 0x%8.8x}) vs. {%a, %d} ({0x%8.8x, 0x%8.8x})\n", - f->name, sizeNames[k], err, iErr, - ((float*) gIn)[j], ((float*) gIn2)[j], - ((cl_uint*) gIn)[j], ((cl_uint*) gIn2)[j], - ((float*) gOut_Ref)[j], ((int*) gOut_Ref2)[j], - ((cl_uint*) gOut_Ref)[j], ((cl_uint*) gOut_Ref2)[j], - test, q2[j], - ((cl_uint*)&test)[0], ((cl_uint*) q2)[j] ); - error = -1; - goto exit; - } - } - } - } - - if( 0 == (i & 0x0fffffff) ) - { - if (gVerboseBruteForce) - { - vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, bufferSize); - } else - { - vlog("." ); - } - fflush(stdout); - - } - } - - if( ! gSkipCorrectnessTesting ) - { - if( gWimpyMode ) - vlog( "Wimp pass" ); - else - vlog( "passed" ); - } - - if( gMeasureTimes ) - { - //Init input array - uint32_t *p = (uint32_t *)gIn; - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - p[j] = genrand_int32(d); - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeof( cl_float ) * sizeValues[j]; - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; // bufferSize / vectorSize rounded up - if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; } - - double sum = 0.0; - double bestTime = INFINITY; - for( k = 0; k < PERF_LOOP_COUNT; k++ ) - { - uint64_t startTime = GetTime(); - if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - - // Make sure OpenCL is done - if( (error = clFinish(gQueue) ) ) - { - vlog_error( "Error %d at clFinish\n", error ); - goto exit; - } - - uint64_t endTime = GetTime(); - double time = SubtractTime( endTime, startTime ); - sum += time; - if( time < bestTime ) - bestTime = time; - } - - if( gReportAverageTimes ) - bestTime = sum / PERF_LOOP_COUNT; - double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) ); - vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] ); - } - } - - if( ! gSkipCorrectnessTesting ) - vlog( "\t{%8.2f, %lld} @ %a", maxError, maxError2, maxErrorVal ); - vlog( "\n" ); - -exit: - // Release - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - clReleaseKernel(kernels[k]); - clReleaseProgram(programs[k]); - } - - return error; -} - -int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode) -{ - uint64_t i; - uint32_t j, k; - int error; - cl_program programs[ VECTOR_SIZE_COUNT ]; - cl_kernel kernels[ VECTOR_SIZE_COUNT ]; - float maxError = 0.0f; - int64_t maxError2 = 0; - int ftz = f->ftz || gForceFTZ; - double maxErrorVal = 0.0f; - double maxErrorVal2 = 0.0f; - size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE; - uint64_t step = bufferSize / sizeof( double ); - - logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); - if(gWimpyMode ){ - step = (1ULL<<32) * gWimpyReductionFactor / (512); - } - -#if defined PARALLEL_REFERENCE - cl_uint threadCount = GetThreadCount(); -#endif - - Force64BitFPUPrecision(); - - int testingRemquo = !strcmp(f->name, "remquo"); - - // Init the kernels - { - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; - if( (error = ThreadPool_Do( BuildKernel_DoubleFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info ) )) - { - return error; - } - } -/* - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - if( (error = BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) ) - return error; -*/ - - for( i = 0; i < (1ULL<<32); i += step ) - { - //Init input array - double *p = (double *)gIn; - double *p2 = (double *)gIn2; - for( j = 0; j < bufferSize / sizeof( double ); j++ ) - { - p[j] = DoubleFromUInt32(genrand_int32(d)); - p2[j] = DoubleFromUInt32(genrand_int32(d)); - } - - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_TRUE, 0, bufferSize, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_TRUE, 0, bufferSize, gIn2, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error ); - return error; - } - - // write garbage into output arrays - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - uint32_t pattern = 0xffffdead; - memset_pattern4(gOut[j], &pattern, bufferSize); - if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j ); - goto exit; - } - - memset_pattern4(gOut2[j], &pattern, bufferSize); - if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", error, j ); - goto exit; - } - } - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeof( cl_double ) * sizeValues[j]; - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; // bufferSize / vectorSize rounded up - if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; } - - if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - } - - // Get that moving - if( (error = clFlush(gQueue) )) - vlog( "clFlush failed\n" ); - - //Calculate the correctly rounded reference result - double *s = (double *)gIn; - double *s2 = (double *)gIn2; - -#if defined PARALLEL_REFERENCE - if (threadCount > 1) { - ComputeReferenceInfoD cri; - cri.x = s; - cri.y = s2; - cri.r = (double *)gOut_Ref; - cri.i = (int *)gOut_Ref2; - cri.f_ffpI = f->dfunc.f_ffpI; - cri.lim = bufferSize / sizeof( double ); - cri.count = (cri.lim + threadCount - 1) / threadCount; - ThreadPool_Do(ReferenceD, threadCount, &cri); - } else { -#endif - double *r = (double *)gOut_Ref; - int *r2 = (int *)gOut_Ref2; - for( j = 0; j < bufferSize / sizeof( double ); j++ ) - r[j] = (double) f->dfunc.f_ffpI( s[j], s2[j], r2+j ); -#if defined PARALLEL_REFERENCE - } -#endif - - // Read the data back - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) ) - { - vlog_error( "ReadArray failed %d\n", error ); - goto exit; - } - if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL)) ) - { - vlog_error( "ReadArray2 failed %d\n", error ); - goto exit; - } - } - - if( gSkipCorrectnessTesting ) - break; - - //Verify data - uint64_t *t = (uint64_t *)gOut_Ref; - int32_t *t2 = (int32_t *)gOut_Ref2; - for( j = 0; j < bufferSize / sizeof( double ); j++ ) - { - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - uint64_t *q = (uint64_t *)gOut[k]; - int32_t *q2 = (int32_t *)gOut2[k]; - - // Check for exact match to correctly rounded result - if (t[j] == q[j] && t2[j] == q2[j]) - continue; - - // Check for paired NaNs - if ((t[j] & 0x7fffffffffffffffUL) > 0x7ff0000000000000UL && - (q[j] & 0x7fffffffffffffffUL) > 0x7ff0000000000000UL && - t2[j] == q2[j]) - continue; - - // if( t[j] != q[j] || t2[j] != q2[j] ) - { - double test = ((double*) q)[j]; - int correct2 = INT_MIN; - long double correct = f->dfunc.f_ffpI( s[j], s2[j], &correct2 ); - float err = Bruteforce_Ulp_Error_Double( test, correct ); - int64_t iErr; - - // in case of remquo, we only care about the sign and last seven bits of - // integer as per the spec. - if(testingRemquo) - iErr = (long long) (q2[j] & 0x0000007f) - (long long) (correct2 & 0x0000007f); - else - iErr = (long long) q2[j] - (long long) correct2; - - //For remquo, if y = 0, x is infinite, or either is NaN then the standard either neglects - //to say what is returned in iptr or leaves it undefined or implementation defined. - int iptrUndefined = fabs(((double*) gIn)[j]) == INFINITY || - ((double*) gIn2)[j] == 0.0 || - isnan(((double*) gIn2)[j]) || - isnan(((double*) gIn)[j]); - if(iptrUndefined) - iErr = 0; - - int fail = ! (fabsf(err) <= f->double_ulps && iErr == 0 ); - if( ftz && fail ) - { - // retry per section 6.5.3.2 - if( IsDoubleResultSubnormal(correct, f->double_ulps ) ) - { - fail = fail && ! ( test == 0.0f && iErr == 0 ); - if( ! fail ) - err = 0.0f; - } - - // retry per section 6.5.3.3 - if( IsDoubleSubnormal( s[j] ) ) - { - int correct3i, correct4i; - long double correct3 = f->dfunc.f_ffpI( 0.0, s2[j], &correct3i ); - long double correct4 = f->dfunc.f_ffpI( -0.0, s2[j], &correct4i ); - float err2 = Bruteforce_Ulp_Error_Double( test, correct3 ); - float err3 = Bruteforce_Ulp_Error_Double( test, correct4 ); - int64_t iErr3 = (long long) q2[j] - (long long) correct3i; - int64_t iErr4 = (long long) q2[j] - (long long) correct4i; - fail = fail && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0)) && (!(fabsf(err3) <= f->double_ulps && iErr4 == 0))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - if( llabs(iErr3) < llabs( iErr ) ) - iErr = iErr3; - if( llabs(iErr4) < llabs( iErr ) ) - iErr = iErr4; - - // retry per section 6.5.3.4 - if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) ) - { - fail = fail && ! ( test == 0.0f && (iErr3 == 0 || iErr4 == 0) ); - if( ! fail ) - err = 0.0f; - } - - //try with both args as zero - if( IsDoubleSubnormal( s2[j] ) ) - { - int correct7i, correct8i; - correct3 = f->dfunc.f_ffpI( 0.0, 0.0, &correct3i ); - correct4 = f->dfunc.f_ffpI( -0.0, 0.0, &correct4i ); - long double correct7 = f->dfunc.f_ffpI( 0.0, -0.0, &correct7i ); - long double correct8 = f->dfunc.f_ffpI( -0.0, -0.0, &correct8i ); - err2 = Bruteforce_Ulp_Error_Double( test, correct3 ); - err3 = Bruteforce_Ulp_Error_Double( test, correct4 ); - float err4 = Bruteforce_Ulp_Error_Double( test, correct7 ); - float err5 = Bruteforce_Ulp_Error_Double( test, correct8 ); - iErr3 = (long long) q2[j] - (long long) correct3i; - iErr4 = (long long) q2[j] - (long long) correct4i; - int64_t iErr7 = (long long) q2[j] - (long long) correct7i; - int64_t iErr8 = (long long) q2[j] - (long long) correct8i; - fail = fail && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0)) && (!(fabsf(err3) <= f->double_ulps && iErr4 == 0)) && - (!(fabsf(err4) <= f->double_ulps && iErr7 == 0)) && (!(fabsf(err5) <= f->double_ulps && iErr8 == 0))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - if( fabsf( err4 ) < fabsf(err ) ) - err = err4; - if( fabsf( err5 ) < fabsf(err ) ) - err = err5; - if( llabs(iErr3) < llabs( iErr ) ) - iErr = iErr3; - if( llabs(iErr4) < llabs( iErr ) ) - iErr = iErr4; - if( llabs(iErr7) < llabs( iErr ) ) - iErr = iErr7; - if( llabs(iErr8) < llabs( iErr ) ) - iErr = iErr8; - - // retry per section 6.5.3.4 - if( IsDoubleResultSubnormal( correct3, f->double_ulps ) || IsDoubleResultSubnormal( correct4, f->double_ulps ) || - IsDoubleResultSubnormal( correct7, f->double_ulps ) || IsDoubleResultSubnormal( correct8, f->double_ulps ) ) - { - fail = fail && ! ( test == 0.0f && (iErr3 == 0 || iErr4 == 0 || iErr7 == 0 || iErr8 == 0)); - if( ! fail ) - err = 0.0f; - } - } - } - else if( IsDoubleSubnormal( s2[j] ) ) - { - int correct3i, correct4i; - long double correct3 = f->dfunc.f_ffpI( s[j], 0.0, &correct3i ); - long double correct4 = f->dfunc.f_ffpI( s[j], -0.0, &correct4i ); - float err2 = Bruteforce_Ulp_Error_Double( test, correct3 ); - float err3 = Bruteforce_Ulp_Error_Double( test, correct4 ); - int64_t iErr3 = (long long) q2[j] - (long long) correct3i; - int64_t iErr4 = (long long) q2[j] - (long long) correct4i; - fail = fail && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0)) && (!(fabsf(err3) <= f->double_ulps && iErr4 == 0))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - if( llabs(iErr3) < llabs( iErr ) ) - iErr = iErr3; - if( llabs(iErr4) < llabs( iErr ) ) - iErr = iErr4; - - // retry per section 6.5.3.4 - if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) ) - { - fail = fail && ! ( test == 0.0f && (iErr3 == 0 || iErr4 == 0) ); - if( ! fail ) - err = 0.0f; - } - } - } - if( fabsf(err ) > maxError ) - { - maxError = fabsf(err); - maxErrorVal = s[j]; - } - if( llabs(iErr) > maxError2 ) - { - maxError2 = llabs(iErr ); - maxErrorVal2 = s[j]; - } - - if( fail ) - { - vlog_error( "\nERROR: %sD%s: {%f, %lld} ulp error at {%.13la, %.13la} ({ 0x%16.16llx, 0x%16.16llx}): *{%.13la, %d} ({ 0x%16.16llx, 0x%8.8x}) vs. {%.13la, %d} ({ 0x%16.16llx, 0x%8.8x})\n", - f->name, sizeNames[k], err, iErr, - ((double*) gIn)[j], ((double*) gIn2)[j], - ((cl_ulong*) gIn)[j], ((cl_ulong*) gIn2)[j], - ((double*) gOut_Ref)[j], ((int*) gOut_Ref2)[j], - ((cl_ulong*) gOut_Ref)[j], ((cl_uint*) gOut_Ref2)[j], - test, q2[j], - ((cl_ulong*) q)[j], ((cl_uint*) q2)[j]); - error = -1; - goto exit; - } - } - } - } - - if( 0 == (i & 0x0fffffff) ) - { - if (gVerboseBruteForce) - { - vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, bufferSize); - } else - { - vlog("." ); - } - - fflush(stdout); - } - } - - if( ! gSkipCorrectnessTesting ) - { - if( gWimpyMode ) - vlog( "Wimp pass" ); - else - vlog( "passed" ); - } - - if( gMeasureTimes ) - { - //Init input array - double *p = (double *)gIn; - for( j = 0; j < bufferSize / sizeof( double ); j++ ) - p[j] = DoubleFromUInt32( genrand_int32(d) ); - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_TRUE, 0, bufferSize, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_TRUE, 0, bufferSize, gIn2, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeof( cl_double ) * sizeValues[j]; - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; // bufferSize / vectorSize rounded up - if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; } - - double sum = 0.0; - double bestTime = INFINITY; - for( k = 0; k < PERF_LOOP_COUNT; k++ ) - { - uint64_t startTime = GetTime(); - if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - - // Make sure OpenCL is done - if( (error = clFinish(gQueue) ) ) - { - vlog_error( "Error %d at clFinish\n", error ); - goto exit; - } - - uint64_t endTime = GetTime(); - double time = SubtractTime( endTime, startTime ); - sum += time; - if( time < bestTime ) - bestTime = time; - } - - if( gReportAverageTimes ) - bestTime = sum / PERF_LOOP_COUNT; - double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) ); - vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] ); - } - for( ; j < gMaxVectorSizeIndex; j++ ) - vlog( "\t -- " ); - } - - if( ! gSkipCorrectnessTesting ) - vlog( "\t{%8.2f, %lld} @ %a", maxError, maxError2, maxErrorVal ); - vlog( "\n" ); - -exit: - // Release - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - clReleaseKernel(kernels[k]); - clReleaseProgram(programs[k]); - } - - return error; -} - - - diff --git a/test_conformance/math_brute_force/binary_two_results_i_double.cpp b/test_conformance/math_brute_force/binary_two_results_i_double.cpp new file mode 100644 index 0000000000..43dc1d304e --- /dev/null +++ b/test_conformance/math_brute_force/binary_two_results_i_double.cpp @@ -0,0 +1,584 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "function_list.h" +#include "test_functions.h" +#include "utility.h" + +#include +#include + +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) +{ + const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global double", + sizeNames[vectorSize], + "* out, __global int", + sizeNames[vectorSize], + "* out2, __global double", + sizeNames[vectorSize], + "* in1, __global double", + sizeNames[vectorSize], + "* in2 )\n" + "{\n" + " size_t i = get_global_id(0);\n" + " out[i] = ", + name, + "( in1[i], in2[i], out2 + i );\n" + "}\n" }; + + const char *c3[] = { + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global double* out, __global int* out2, __global double* in, " + "__global double* in2)\n" + "{\n" + " size_t i = get_global_id(0);\n" + " if( i + 1 < get_global_size(0) )\n" + " {\n" + " double3 d0 = vload3( 0, in + 3 * i );\n" + " double3 d1 = vload3( 0, in2 + 3 * i );\n" + " int3 i0 = 0xdeaddead;\n" + " d0 = ", + name, + "( d0, d1, &i0 );\n" + " vstore3( d0, 0, out + 3*i );\n" + " vstore3( i0, 0, out2 + 3*i );\n" + " }\n" + " else\n" + " {\n" + " size_t parity = i & 1; // Figure out how many elements are " + "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " + "buffer size \n" + " double3 d0;\n" + " double3 d1;\n" + " int3 i0 = 0xdeaddead;\n" + " switch( parity )\n" + " {\n" + " case 1:\n" + " d0 = (double3)( in[3*i], NAN, NAN ); \n" + " d1 = (double3)( in2[3*i], NAN, NAN ); \n" + " break;\n" + " case 0:\n" + " d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n" + " d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n" + " break;\n" + " }\n" + " d0 = ", + name, + "( d0, d1, &i0 );\n" + " switch( parity )\n" + " {\n" + " case 0:\n" + " out[3*i+1] = d0.y; \n" + " out2[3*i+1] = i0.y; \n" + " // fall through\n" + " case 1:\n" + " out[3*i] = d0.x; \n" + " out2[3*i] = i0.x; \n" + " break;\n" + " }\n" + " }\n" + "}\n" + }; + + const char **kern = c; + size_t kernSize = sizeof(c) / sizeof(c[0]); + + if (sizeValues[vectorSize] == 3) + { + kern = c3; + kernSize = sizeof(c3) / sizeof(c3[0]); + } + + char testName[32]; + snprintf(testName, sizeof(testName) - 1, "math_kernel%s", + sizeNames[vectorSize]); + + return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); +} + +struct BuildKernelInfo +{ + cl_uint offset; // the first vector size to build + cl_kernel *kernels; + cl_program *programs; + const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. +}; + +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +{ + BuildKernelInfo *info = (BuildKernelInfo *)p; + cl_uint i = info->offset + job_id; + return BuildKernel(info->nameInCode, i, info->kernels + i, + info->programs + i, info->relaxedMode); +} + +struct ComputeReferenceInfoD +{ + const double *x; + const double *y; + double *r; + int *i; + long double (*f_ffpI)(long double, long double, int *); + cl_uint lim; + cl_uint count; +}; + +cl_int ReferenceD(cl_uint jid, cl_uint tid, void *userInfo) +{ + ComputeReferenceInfoD *cri = (ComputeReferenceInfoD *)userInfo; + cl_uint lim = cri->lim; + cl_uint count = cri->count; + cl_uint off = jid * count; + const double *x = cri->x + off; + const double *y = cri->y + off; + double *r = cri->r + off; + int *i = cri->i + off; + long double (*f)(long double, long double, int *) = cri->f_ffpI; + + if (off + count > lim) count = lim - off; + + Force64BitFPUPrecision(); + + for (cl_uint j = 0; j < count; ++j) + r[j] = (double)f((long double)x[j], (long double)y[j], i + j); + + return CL_SUCCESS; +} + +} // anonymous namespace + +int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode) +{ + int error; + cl_program programs[VECTOR_SIZE_COUNT]; + cl_kernel kernels[VECTOR_SIZE_COUNT]; + float maxError = 0.0f; + int64_t maxError2 = 0; + int ftz = f->ftz || gForceFTZ; + double maxErrorVal = 0.0f; + double maxErrorVal2 = 0.0f; + uint64_t step = getTestStep(sizeof(double), BUFFER_SIZE); + + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); + + cl_uint threadCount = GetThreadCount(); + + Force64BitFPUPrecision(); + + int testingRemquo = !strcmp(f->name, "remquo"); + + // Init the kernels + { + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + return error; + } + + for (uint64_t i = 0; i < (1ULL << 32); i += step) + { + // Init input array + double *p = (double *)gIn; + double *p2 = (double *)gIn2; + for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++) + { + p[j] = DoubleFromUInt32(genrand_int32(d)); + p2[j] = DoubleFromUInt32(genrand_int32(d)); + } + + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, + BUFFER_SIZE, gIn, 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); + return error; + } + + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, + BUFFER_SIZE, gIn2, 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error); + return error; + } + + // write garbage into output arrays + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + uint32_t pattern = 0xffffdead; + memset_pattern4(gOut[j], &pattern, BUFFER_SIZE); + if ((error = + clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, + BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", + error, j); + goto exit; + } + + memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE); + if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, + 0, BUFFER_SIZE, gOut2[j], 0, NULL, + NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", + error, j); + goto exit; + } + } + + // Run the kernels + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + size_t vectorSize = sizeof(cl_double) * sizeValues[j]; + size_t localCount = (BUFFER_SIZE + vectorSize - 1) + / vectorSize; // BUFFER_SIZE / vectorSize rounded up + if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]), + &gOutBuffer[j]))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]), + &gOutBuffer2[j]))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer), + &gInBuffer))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer2), + &gInBuffer2))) + { + LogBuildError(programs[j]); + goto exit; + } + + if ((error = + clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, + &localCount, NULL, 0, NULL, NULL))) + { + vlog_error("FAILED -- could not execute kernel\n"); + goto exit; + } + } + + // Get that moving + if ((error = clFlush(gQueue))) vlog("clFlush failed\n"); + + // Calculate the correctly rounded reference result + double *s = (double *)gIn; + double *s2 = (double *)gIn2; + + if (threadCount > 1) + { + ComputeReferenceInfoD cri; + cri.x = s; + cri.y = s2; + cri.r = (double *)gOut_Ref; + cri.i = (int *)gOut_Ref2; + cri.f_ffpI = f->dfunc.f_ffpI; + cri.lim = BUFFER_SIZE / sizeof(double); + cri.count = (cri.lim + threadCount - 1) / threadCount; + ThreadPool_Do(ReferenceD, threadCount, &cri); + } + else + { + double *r = (double *)gOut_Ref; + int *r2 = (int *)gOut_Ref2; + for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++) + r[j] = (double)f->dfunc.f_ffpI(s[j], s2[j], r2 + j); + } + + // Read the data back + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + if ((error = + clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, + BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + { + vlog_error("ReadArray failed %d\n", error); + goto exit; + } + if ((error = + clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, + BUFFER_SIZE, gOut2[j], 0, NULL, NULL))) + { + vlog_error("ReadArray2 failed %d\n", error); + goto exit; + } + } + + if (gSkipCorrectnessTesting) break; + + // Verify data + uint64_t *t = (uint64_t *)gOut_Ref; + int32_t *t2 = (int32_t *)gOut_Ref2; + for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++) + { + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + uint64_t *q = (uint64_t *)gOut[k]; + int32_t *q2 = (int32_t *)gOut2[k]; + + // Check for exact match to correctly rounded result + if (t[j] == q[j] && t2[j] == q2[j]) continue; + + // Check for paired NaNs + if ((t[j] & 0x7fffffffffffffffUL) > 0x7ff0000000000000UL + && (q[j] & 0x7fffffffffffffffUL) > 0x7ff0000000000000UL + && t2[j] == q2[j]) + continue; + + double test = ((double *)q)[j]; + int correct2 = INT_MIN; + long double correct = f->dfunc.f_ffpI(s[j], s2[j], &correct2); + float err = Bruteforce_Ulp_Error_Double(test, correct); + int64_t iErr; + + // in case of remquo, we only care about the sign and last + // seven bits of integer as per the spec. + if (testingRemquo) + iErr = (long long)(q2[j] & 0x0000007f) + - (long long)(correct2 & 0x0000007f); + else + iErr = (long long)q2[j] - (long long)correct2; + + // For remquo, if y = 0, x is infinite, or either is NaN + // then the standard either neglects to say what is returned + // in iptr or leaves it undefined or implementation defined. + int iptrUndefined = fabs(((double *)gIn)[j]) == INFINITY + || ((double *)gIn2)[j] == 0.0 || isnan(((double *)gIn2)[j]) + || isnan(((double *)gIn)[j]); + if (iptrUndefined) iErr = 0; + + int fail = !(fabsf(err) <= f->double_ulps && iErr == 0); + if (ftz && fail) + { + // retry per section 6.5.3.2 + if (IsDoubleResultSubnormal(correct, f->double_ulps)) + { + fail = fail && !(test == 0.0f && iErr == 0); + if (!fail) err = 0.0f; + } + + // retry per section 6.5.3.3 + if (IsDoubleSubnormal(s[j])) + { + int correct3i, correct4i; + long double correct3 = + f->dfunc.f_ffpI(0.0, s2[j], &correct3i); + long double correct4 = + f->dfunc.f_ffpI(-0.0, s2[j], &correct4i); + float err2 = + Bruteforce_Ulp_Error_Double(test, correct3); + float err3 = + Bruteforce_Ulp_Error_Double(test, correct4); + int64_t iErr3 = (long long)q2[j] - (long long)correct3i; + int64_t iErr4 = (long long)q2[j] - (long long)correct4i; + fail = fail + && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0)) + && (!(fabsf(err3) <= f->double_ulps + && iErr4 == 0))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + if (llabs(iErr3) < llabs(iErr)) iErr = iErr3; + if (llabs(iErr4) < llabs(iErr)) iErr = iErr4; + + // retry per section 6.5.3.4 + if (IsDoubleResultSubnormal(correct2, f->double_ulps) + || IsDoubleResultSubnormal(correct3, + f->double_ulps)) + { + fail = fail + && !(test == 0.0f + && (iErr3 == 0 || iErr4 == 0)); + if (!fail) err = 0.0f; + } + + // try with both args as zero + if (IsDoubleSubnormal(s2[j])) + { + int correct7i, correct8i; + correct3 = f->dfunc.f_ffpI(0.0, 0.0, &correct3i); + correct4 = f->dfunc.f_ffpI(-0.0, 0.0, &correct4i); + long double correct7 = + f->dfunc.f_ffpI(0.0, -0.0, &correct7i); + long double correct8 = + f->dfunc.f_ffpI(-0.0, -0.0, &correct8i); + err2 = Bruteforce_Ulp_Error_Double(test, correct3); + err3 = Bruteforce_Ulp_Error_Double(test, correct4); + float err4 = + Bruteforce_Ulp_Error_Double(test, correct7); + float err5 = + Bruteforce_Ulp_Error_Double(test, correct8); + iErr3 = (long long)q2[j] - (long long)correct3i; + iErr4 = (long long)q2[j] - (long long)correct4i; + int64_t iErr7 = + (long long)q2[j] - (long long)correct7i; + int64_t iErr8 = + (long long)q2[j] - (long long)correct8i; + fail = fail + && ((!(fabsf(err2) <= f->double_ulps + && iErr3 == 0)) + && (!(fabsf(err3) <= f->double_ulps + && iErr4 == 0)) + && (!(fabsf(err4) <= f->double_ulps + && iErr7 == 0)) + && (!(fabsf(err5) <= f->double_ulps + && iErr8 == 0))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + if (fabsf(err4) < fabsf(err)) err = err4; + if (fabsf(err5) < fabsf(err)) err = err5; + if (llabs(iErr3) < llabs(iErr)) iErr = iErr3; + if (llabs(iErr4) < llabs(iErr)) iErr = iErr4; + if (llabs(iErr7) < llabs(iErr)) iErr = iErr7; + if (llabs(iErr8) < llabs(iErr)) iErr = iErr8; + + // retry per section 6.5.3.4 + if (IsDoubleResultSubnormal(correct3, + f->double_ulps) + || IsDoubleResultSubnormal(correct4, + f->double_ulps) + || IsDoubleResultSubnormal(correct7, + f->double_ulps) + || IsDoubleResultSubnormal(correct8, + f->double_ulps)) + { + fail = fail + && !(test == 0.0f + && (iErr3 == 0 || iErr4 == 0 + || iErr7 == 0 || iErr8 == 0)); + if (!fail) err = 0.0f; + } + } + } + else if (IsDoubleSubnormal(s2[j])) + { + int correct3i, correct4i; + long double correct3 = + f->dfunc.f_ffpI(s[j], 0.0, &correct3i); + long double correct4 = + f->dfunc.f_ffpI(s[j], -0.0, &correct4i); + float err2 = + Bruteforce_Ulp_Error_Double(test, correct3); + float err3 = + Bruteforce_Ulp_Error_Double(test, correct4); + int64_t iErr3 = (long long)q2[j] - (long long)correct3i; + int64_t iErr4 = (long long)q2[j] - (long long)correct4i; + fail = fail + && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0)) + && (!(fabsf(err3) <= f->double_ulps + && iErr4 == 0))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + if (llabs(iErr3) < llabs(iErr)) iErr = iErr3; + if (llabs(iErr4) < llabs(iErr)) iErr = iErr4; + + // retry per section 6.5.3.4 + if (IsDoubleResultSubnormal(correct2, f->double_ulps) + || IsDoubleResultSubnormal(correct3, + f->double_ulps)) + { + fail = fail + && !(test == 0.0f + && (iErr3 == 0 || iErr4 == 0)); + if (!fail) err = 0.0f; + } + } + } + if (fabsf(err) > maxError) + { + maxError = fabsf(err); + maxErrorVal = s[j]; + } + if (llabs(iErr) > maxError2) + { + maxError2 = llabs(iErr); + maxErrorVal2 = s[j]; + } + + if (fail) + { + vlog_error( + "\nERROR: %sD%s: {%f, %lld} ulp error at {%.13la, " + "%.13la} ({ 0x%16.16llx, 0x%16.16llx}): *{%.13la, " + "%d} ({ 0x%16.16llx, 0x%8.8x}) vs. {%.13la, %d} ({ " + "0x%16.16llx, 0x%8.8x})\n", + f->name, sizeNames[k], err, iErr, ((double *)gIn)[j], + ((double *)gIn2)[j], ((cl_ulong *)gIn)[j], + ((cl_ulong *)gIn2)[j], ((double *)gOut_Ref)[j], + ((int *)gOut_Ref2)[j], ((cl_ulong *)gOut_Ref)[j], + ((cl_uint *)gOut_Ref2)[j], test, q2[j], + ((cl_ulong *)q)[j], ((cl_uint *)q2)[j]); + error = -1; + goto exit; + } + } + } + + if (0 == (i & 0x0fffffff)) + { + if (gVerboseBruteForce) + { + vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, + BUFFER_SIZE); + } + else + { + vlog("."); + } + fflush(stdout); + } + } + + if (!gSkipCorrectnessTesting) + { + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal, + maxErrorVal2); + } + + vlog("\n"); + +exit: + // Release + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + clReleaseKernel(kernels[k]); + clReleaseProgram(programs[k]); + } + + return error; +} diff --git a/test_conformance/math_brute_force/binary_two_results_i_float.cpp b/test_conformance/math_brute_force/binary_two_results_i_float.cpp new file mode 100644 index 0000000000..83ceeaabf6 --- /dev/null +++ b/test_conformance/math_brute_force/binary_two_results_i_float.cpp @@ -0,0 +1,569 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "function_list.h" +#include "test_functions.h" +#include "utility.h" + +#include +#include + +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) +{ + const char *c[] = { "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global float", + sizeNames[vectorSize], + "* out, __global int", + sizeNames[vectorSize], + "* out2, __global float", + sizeNames[vectorSize], + "* in1, __global float", + sizeNames[vectorSize], + "* in2 )\n" + "{\n" + " size_t i = get_global_id(0);\n" + " out[i] = ", + name, + "( in1[i], in2[i], out2 + i );\n" + "}\n" }; + + const char *c3[] = { + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global float* out, __global int* out2, __global float* in, " + "__global float* in2)\n" + "{\n" + " size_t i = get_global_id(0);\n" + " if( i + 1 < get_global_size(0) )\n" + " {\n" + " float3 f0 = vload3( 0, in + 3 * i );\n" + " float3 f1 = vload3( 0, in2 + 3 * i );\n" + " int3 i0 = 0xdeaddead;\n" + " f0 = ", + name, + "( f0, f1, &i0 );\n" + " vstore3( f0, 0, out + 3*i );\n" + " vstore3( i0, 0, out2 + 3*i );\n" + " }\n" + " else\n" + " {\n" + " size_t parity = i & 1; // Figure out how many elements are " + "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " + "buffer size \n" + " float3 f0;\n" + " float3 f1;\n" + " int3 i0 = 0xdeaddead;\n" + " switch( parity )\n" + " {\n" + " case 1:\n" + " f0 = (float3)( in[3*i], NAN, NAN ); \n" + " f1 = (float3)( in2[3*i], NAN, NAN ); \n" + " break;\n" + " case 0:\n" + " f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n" + " f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n" + " break;\n" + " }\n" + " f0 = ", + name, + "( f0, f1, &i0 );\n" + " switch( parity )\n" + " {\n" + " case 0:\n" + " out[3*i+1] = f0.y; \n" + " out2[3*i+1] = i0.y; \n" + " // fall through\n" + " case 1:\n" + " out[3*i] = f0.x; \n" + " out2[3*i] = i0.x; \n" + " break;\n" + " }\n" + " }\n" + "}\n" + }; + + const char **kern = c; + size_t kernSize = sizeof(c) / sizeof(c[0]); + + if (sizeValues[vectorSize] == 3) + { + kern = c3; + kernSize = sizeof(c3) / sizeof(c3[0]); + } + + char testName[32]; + snprintf(testName, sizeof(testName) - 1, "math_kernel%s", + sizeNames[vectorSize]); + + return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); +} + +struct BuildKernelInfo +{ + cl_uint offset; // the first vector size to build + cl_kernel *kernels; + cl_program *programs; + const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. +}; + +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +{ + BuildKernelInfo *info = (BuildKernelInfo *)p; + cl_uint i = info->offset + job_id; + return BuildKernel(info->nameInCode, i, info->kernels + i, + info->programs + i, info->relaxedMode); +} + +struct ComputeReferenceInfoF +{ + const float *x; + const float *y; + float *r; + int *i; + double (*f_ffpI)(double, double, int *); + cl_uint lim; + cl_uint count; +}; + +cl_int ReferenceF(cl_uint jid, cl_uint tid, void *userInfo) +{ + ComputeReferenceInfoF *cri = (ComputeReferenceInfoF *)userInfo; + cl_uint lim = cri->lim; + cl_uint count = cri->count; + cl_uint off = jid * count; + const float *x = cri->x + off; + const float *y = cri->y + off; + float *r = cri->r + off; + int *i = cri->i + off; + double (*f)(double, double, int *) = cri->f_ffpI; + + if (off + count > lim) count = lim - off; + + for (cl_uint j = 0; j < count; ++j) + r[j] = (float)f((double)x[j], (double)y[j], i + j); + + return CL_SUCCESS; +} + +} // anonymous namespace + +int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode) +{ + int error; + + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + + cl_program programs[VECTOR_SIZE_COUNT]; + cl_kernel kernels[VECTOR_SIZE_COUNT]; + float maxError = 0.0f; + int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + int64_t maxError2 = 0; + float maxErrorVal = 0.0f; + float maxErrorVal2 = 0.0f; + uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE); + + cl_uint threadCount = GetThreadCount(); + + float float_ulps; + if (gIsEmbedded) + float_ulps = f->float_embedded_ulps; + else + float_ulps = f->float_ulps; + + int testingRemquo = !strcmp(f->name, "remquo"); + + // Init the kernels + { + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + return error; + } + + for (uint64_t i = 0; i < (1ULL << 32); i += step) + { + // Init input array + cl_uint *p = (cl_uint *)gIn; + cl_uint *p2 = (cl_uint *)gIn2; + for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++) + { + p[j] = genrand_int32(d); + p2[j] = genrand_int32(d); + } + + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, + BUFFER_SIZE, gIn, 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); + return error; + } + + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, + BUFFER_SIZE, gIn2, 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error); + return error; + } + + // write garbage into output arrays + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + uint32_t pattern = 0xffffdead; + memset_pattern4(gOut[j], &pattern, BUFFER_SIZE); + if ((error = + clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, + BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", + error, j); + goto exit; + } + + memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE); + if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, + 0, BUFFER_SIZE, gOut2[j], 0, NULL, + NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", + error, j); + goto exit; + } + } + + // Run the kernels + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + size_t vectorSize = sizeof(cl_float) * sizeValues[j]; + size_t localCount = (BUFFER_SIZE + vectorSize - 1) + / vectorSize; // BUFFER_SIZE / vectorSize rounded up + if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]), + &gOutBuffer[j]))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]), + &gOutBuffer2[j]))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer), + &gInBuffer))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer2), + &gInBuffer2))) + { + LogBuildError(programs[j]); + goto exit; + } + + if ((error = + clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, + &localCount, NULL, 0, NULL, NULL))) + { + vlog_error("FAILED -- could not execute kernel\n"); + goto exit; + } + } + + // Get that moving + if ((error = clFlush(gQueue))) vlog("clFlush failed\n"); + + // Calculate the correctly rounded reference result + float *s = (float *)gIn; + float *s2 = (float *)gIn2; + + if (threadCount > 1) + { + ComputeReferenceInfoF cri; + cri.x = s; + cri.y = s2; + cri.r = (float *)gOut_Ref; + cri.i = (int *)gOut_Ref2; + cri.f_ffpI = f->func.f_ffpI; + cri.lim = BUFFER_SIZE / sizeof(float); + cri.count = (cri.lim + threadCount - 1) / threadCount; + ThreadPool_Do(ReferenceF, threadCount, &cri); + } + else + { + float *r = (float *)gOut_Ref; + int *r2 = (int *)gOut_Ref2; + for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++) + r[j] = (float)f->func.f_ffpI(s[j], s2[j], r2 + j); + } + + // Read the data back + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + if ((error = + clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, + BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + { + vlog_error("ReadArray failed %d\n", error); + goto exit; + } + if ((error = + clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, + BUFFER_SIZE, gOut2[j], 0, NULL, NULL))) + { + vlog_error("ReadArray2 failed %d\n", error); + goto exit; + } + } + + if (gSkipCorrectnessTesting) break; + + // Verify data + uint32_t *t = (uint32_t *)gOut_Ref; + int32_t *t2 = (int32_t *)gOut_Ref2; + for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++) + { + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + uint32_t *q = (uint32_t *)(gOut[k]); + int32_t *q2 = (int32_t *)gOut2[k]; + + // Check for exact match to correctly rounded result + if (t[j] == q[j] && t2[j] == q2[j]) continue; + + // Check for paired NaNs + if ((t[j] & 0x7fffffff) > 0x7f800000 + && (q[j] & 0x7fffffff) > 0x7f800000 && t2[j] == q2[j]) + continue; + + float test = ((float *)q)[j]; + int correct2 = INT_MIN; + double correct = f->func.f_ffpI(s[j], s2[j], &correct2); + float err = Ulp_Error(test, correct); + int64_t iErr; + + // in case of remquo, we only care about the sign and last + // seven bits of integer as per the spec. + if (testingRemquo) + iErr = (long long)(q2[j] & 0x0000007f) + - (long long)(correct2 & 0x0000007f); + else + iErr = (long long)q2[j] - (long long)correct2; + + // For remquo, if y = 0, x is infinite, or either is NaN + // then the standard either neglects to say what is returned + // in iptr or leaves it undefined or implementation defined. + int iptrUndefined = fabs(((float *)gIn)[j]) == INFINITY + || ((float *)gIn2)[j] == 0.0f || isnan(((float *)gIn2)[j]) + || isnan(((float *)gIn)[j]); + if (iptrUndefined) iErr = 0; + + int fail = !(fabsf(err) <= float_ulps && iErr == 0); + if (ftz && fail) + { + // retry per section 6.5.3.2 + if (IsFloatResultSubnormal(correct, float_ulps)) + { + fail = fail && !(test == 0.0f && iErr == 0); + if (!fail) err = 0.0f; + } + + // retry per section 6.5.3.3 + if (IsFloatSubnormal(s[j])) + { + int correct3i, correct4i; + double correct3 = + f->func.f_ffpI(0.0, s2[j], &correct3i); + double correct4 = + f->func.f_ffpI(-0.0, s2[j], &correct4i); + float err2 = Ulp_Error(test, correct3); + float err3 = Ulp_Error(test, correct4); + int64_t iErr3 = (long long)q2[j] - (long long)correct3i; + int64_t iErr4 = (long long)q2[j] - (long long)correct4i; + fail = fail + && ((!(fabsf(err2) <= float_ulps && iErr3 == 0)) + && (!(fabsf(err3) <= float_ulps + && iErr4 == 0))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + if (llabs(iErr3) < llabs(iErr)) iErr = iErr3; + if (llabs(iErr4) < llabs(iErr)) iErr = iErr4; + + // retry per section 6.5.3.4 + if (IsFloatResultSubnormal(correct2, float_ulps) + || IsFloatResultSubnormal(correct3, float_ulps)) + { + fail = fail + && !(test == 0.0f + && (iErr3 == 0 || iErr4 == 0)); + if (!fail) err = 0.0f; + } + + // try with both args as zero + if (IsFloatSubnormal(s2[j])) + { + int correct7i, correct8i; + correct3 = f->func.f_ffpI(0.0, 0.0, &correct3i); + correct4 = f->func.f_ffpI(-0.0, 0.0, &correct4i); + double correct7 = + f->func.f_ffpI(0.0, -0.0, &correct7i); + double correct8 = + f->func.f_ffpI(-0.0, -0.0, &correct8i); + err2 = Ulp_Error(test, correct3); + err3 = Ulp_Error(test, correct4); + float err4 = Ulp_Error(test, correct7); + float err5 = Ulp_Error(test, correct8); + iErr3 = (long long)q2[j] - (long long)correct3i; + iErr4 = (long long)q2[j] - (long long)correct4i; + int64_t iErr7 = + (long long)q2[j] - (long long)correct7i; + int64_t iErr8 = + (long long)q2[j] - (long long)correct8i; + fail = fail + && ((!(fabsf(err2) <= float_ulps && iErr3 == 0)) + && (!(fabsf(err3) <= float_ulps + && iErr4 == 0)) + && (!(fabsf(err4) <= float_ulps + && iErr7 == 0)) + && (!(fabsf(err5) <= float_ulps + && iErr8 == 0))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + if (fabsf(err4) < fabsf(err)) err = err4; + if (fabsf(err5) < fabsf(err)) err = err5; + if (llabs(iErr3) < llabs(iErr)) iErr = iErr3; + if (llabs(iErr4) < llabs(iErr)) iErr = iErr4; + if (llabs(iErr7) < llabs(iErr)) iErr = iErr7; + if (llabs(iErr8) < llabs(iErr)) iErr = iErr8; + + // retry per section 6.5.3.4 + if (IsFloatResultSubnormal(correct3, float_ulps) + || IsFloatResultSubnormal(correct4, float_ulps) + || IsFloatResultSubnormal(correct7, float_ulps) + || IsFloatResultSubnormal(correct8, float_ulps)) + { + fail = fail + && !(test == 0.0f + && (iErr3 == 0 || iErr4 == 0 + || iErr7 == 0 || iErr8 == 0)); + if (!fail) err = 0.0f; + } + } + } + else if (IsFloatSubnormal(s2[j])) + { + int correct3i, correct4i; + double correct3 = f->func.f_ffpI(s[j], 0.0, &correct3i); + double correct4 = + f->func.f_ffpI(s[j], -0.0, &correct4i); + float err2 = Ulp_Error(test, correct3); + float err3 = Ulp_Error(test, correct4); + int64_t iErr3 = (long long)q2[j] - (long long)correct3i; + int64_t iErr4 = (long long)q2[j] - (long long)correct4i; + fail = fail + && ((!(fabsf(err2) <= float_ulps && iErr3 == 0)) + && (!(fabsf(err3) <= float_ulps + && iErr4 == 0))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + if (llabs(iErr3) < llabs(iErr)) iErr = iErr3; + if (llabs(iErr4) < llabs(iErr)) iErr = iErr4; + + // retry per section 6.5.3.4 + if (IsFloatResultSubnormal(correct2, float_ulps) + || IsFloatResultSubnormal(correct3, float_ulps)) + { + fail = fail + && !(test == 0.0f + && (iErr3 == 0 || iErr4 == 0)); + if (!fail) err = 0.0f; + } + } + } + if (fabsf(err) > maxError) + { + maxError = fabsf(err); + maxErrorVal = s[j]; + } + if (llabs(iErr) > maxError2) + { + maxError2 = llabs(iErr); + maxErrorVal2 = s[j]; + } + + if (fail) + { + vlog_error( + "\nERROR: %s%s: {%f, %lld} ulp error at {%a, %a} " + "({0x%8.8x, 0x%8.8x}): *{%a, %d} ({0x%8.8x, " + "0x%8.8x}) vs. {%a, %d} ({0x%8.8x, 0x%8.8x})\n", + f->name, sizeNames[k], err, iErr, ((float *)gIn)[j], + ((float *)gIn2)[j], ((cl_uint *)gIn)[j], + ((cl_uint *)gIn2)[j], ((float *)gOut_Ref)[j], + ((int *)gOut_Ref2)[j], ((cl_uint *)gOut_Ref)[j], + ((cl_uint *)gOut_Ref2)[j], test, q2[j], + ((cl_uint *)&test)[0], ((cl_uint *)q2)[j]); + error = -1; + goto exit; + } + } + } + + if (0 == (i & 0x0fffffff)) + { + if (gVerboseBruteForce) + { + vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, + BUFFER_SIZE); + } + else + { + vlog("."); + } + fflush(stdout); + } + } + + if (!gSkipCorrectnessTesting) + { + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal, + maxErrorVal2); + } + + vlog("\n"); + +exit: + // Release + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + clReleaseKernel(kernels[k]); + clReleaseProgram(programs[k]); + } + + return error; +} diff --git a/test_conformance/clcpp/convert/main.cpp b/test_conformance/math_brute_force/common.h similarity index 65% rename from test_conformance/clcpp/convert/main.cpp rename to test_conformance/math_brute_force/common.h index 78e3763750..3eafb6de30 100644 --- a/test_conformance/clcpp/convert/main.cpp +++ b/test_conformance/math_brute_force/common.h @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2021 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -13,13 +13,15 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "../common.hpp" +#ifndef COMMON_H +#define COMMON_H + +#include "utility.h" -#include "convert_cast.hpp" +#include +#include +// Array of thread-specific kernels for each vector size. +using KernelMatrix = std::array, VECTOR_SIZE_COUNT>; -int main(int argc, const char *argv[]) -{ - auto& tests = autotest::test_suite::global_test_suite().test_defs; - return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0); -} +#endif /* COMMON_H */ diff --git a/test_conformance/math_brute_force/FunctionList.cpp b/test_conformance/math_brute_force/function_list.cpp similarity index 84% rename from test_conformance/math_brute_force/FunctionList.cpp rename to test_conformance/math_brute_force/function_list.cpp index a07fa06974..917362852c 100644 --- a/test_conformance/math_brute_force/FunctionList.cpp +++ b/test_conformance/math_brute_force/function_list.cpp @@ -13,16 +13,18 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "FunctionList.h" + +#include "function_list.h" #include "reference_math.h" +#include "test_functions.h" -#define FTZ_ON 1 +#define FTZ_ON 1 #define FTZ_OFF 0 -#define EXACT 0.0f +#define EXACT 0.0f #define RELAXED_ON 1 #define RELAXED_OFF 0 -#define STRINGIFY( _s) #_s +#define STRINGIFY(_s) #_s // Only use ulps information in spir test #ifdef FUNCTION_LIST_ULPS_ONLY @@ -51,25 +53,25 @@ STRINGIFY(_name), _operator, { NULL }, { NULL }, { NULL }, _ulp, _ulp, \ _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type \ } -#define unaryF NULL -#define i_unaryF NULL -#define unaryF_u NULL -#define macro_unaryF NULL -#define binaryF NULL -#define binaryF_nextafter NULL -#define binaryOperatorF NULL -#define binaryF_i NULL -#define macro_binaryF NULL -#define ternaryF NULL -#define unaryF_two_results NULL -#define unaryF_two_results_i NULL + +#define unaryF NULL +#define i_unaryF NULL +#define unaryF_u NULL +#define macro_unaryF NULL +#define binaryF NULL +#define binaryOperatorF NULL +#define binaryF_i NULL +#define macro_binaryF NULL +#define ternaryF NULL +#define unaryF_two_results NULL +#define unaryF_two_results_i NULL #define binaryF_two_results_i NULL -#define mad_function NULL +#define mad_function NULL -#define reference_sqrt NULL -#define reference_sqrtl NULL -#define reference_divide NULL -#define reference_dividel NULL +#define reference_sqrt NULL +#define reference_sqrtl NULL +#define reference_divide NULL +#define reference_dividel NULL #define reference_relaxed_divide NULL #else // FUNCTION_LIST_ULPS_ONLY @@ -102,35 +104,97 @@ _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type \ } -extern const vtbl _unary; // float foo( float ) -extern const vtbl _unary_u; // float foo( uint ), double foo( ulong ) -extern const vtbl _i_unary; // int foo( float ) -extern const vtbl _macro_unary; // int foo( float ), returns {0,1} for scalar, { 0, -1 } for vector -extern const vtbl _binary; // float foo( float, float ) -extern const vtbl _binary_nextafter; // float foo( float, float ), special handling for nextafter -extern const vtbl _binary_operator; // float .op. float -extern const vtbl _macro_binary; // int foo( float, float ), returns {0,1} for scalar, { 0, -1 } for vector -extern const vtbl _binary_i; // float foo( float, int ) -extern const vtbl _ternary; // float foo( float, float, float ) -extern const vtbl _unary_two_results; // float foo( float, float * ) -extern const vtbl _unary_two_results_i; // float foo( float, int * ) -extern const vtbl _binary_two_results_i; // float foo( float, float, int * ) -extern const vtbl _mad_tbl; // float mad( float, float, float ) +static constexpr vtbl _unary = { + "unary", + TestFunc_Float_Float, + TestFunc_Double_Double, +}; + +static constexpr vtbl _i_unary = { + "i_unary", + TestFunc_Int_Float, + TestFunc_Int_Double, +}; + +static constexpr vtbl _unary_u = { + "unary_u", + TestFunc_Float_UInt, + TestFunc_Double_ULong, +}; + +static constexpr vtbl _macro_unary = { + "macro_unary", + TestMacro_Int_Float, + TestMacro_Int_Double, +}; + +static constexpr vtbl _binary = { + "binary", + TestFunc_Float_Float_Float, + TestFunc_Double_Double_Double, +}; + +static constexpr vtbl _binary_operator = { + "binaryOperator", + TestFunc_Float_Float_Float_Operator, + TestFunc_Double_Double_Double_Operator, +}; + +static constexpr vtbl _binary_i = { + "binary_i", + TestFunc_Float_Float_Int, + TestFunc_Double_Double_Int, +}; + +static constexpr vtbl _macro_binary = { + "macro_binary", + TestMacro_Int_Float_Float, + TestMacro_Int_Double_Double, +}; + +static constexpr vtbl _ternary = { + "ternary", + TestFunc_Float_Float_Float_Float, + TestFunc_Double_Double_Double_Double, +}; + +static constexpr vtbl _unary_two_results = { + "unary_two_results", + TestFunc_Float2_Float, + TestFunc_Double2_Double, +}; + +static constexpr vtbl _unary_two_results_i = { + "unary_two_results_i", + TestFunc_FloatI_Float, + TestFunc_DoubleI_Double, +}; + +static constexpr vtbl _binary_two_results_i = { + "binary_two_results_i", + TestFunc_FloatI_Float_Float, + TestFunc_DoubleI_Double_Double, +}; + +static constexpr vtbl _mad_tbl = { + "ternary", + TestFunc_mad_Float, + TestFunc_mad_Double, +}; #define unaryF &_unary #define i_unaryF &_i_unary -#define unaryF_u &_unary_u +#define unaryF_u &_unary_u #define macro_unaryF &_macro_unary #define binaryF &_binary -#define binaryF_nextafter &_binary_nextafter #define binaryOperatorF &_binary_operator #define binaryF_i &_binary_i #define macro_binaryF &_macro_binary #define ternaryF &_ternary -#define unaryF_two_results &_unary_two_results -#define unaryF_two_results_i &_unary_two_results_i -#define binaryF_two_results_i &_binary_two_results_i -#define mad_function &_mad_tbl +#define unaryF_two_results &_unary_two_results +#define unaryF_two_results_i &_unary_two_results_i +#define binaryF_two_results_i &_binary_two_results_i +#define mad_function &_mad_tbl #endif // FUNCTION_LIST_ULPS_ONLY @@ -214,7 +278,7 @@ const Func functionList[] = { ENTRY(minmag, 0.0f, 0.0f, FTZ_OFF, binaryF), ENTRY(modf, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results), ENTRY(nan, 0.0f, 0.0f, FTZ_OFF, unaryF_u), - ENTRY(nextafter, 0.0f, 0.0f, FTZ_OFF, binaryF_nextafter), + ENTRY(nextafter, 0.0f, 0.0f, FTZ_OFF, binaryF), ENTRY_EXT(pow, 16.0f, 16.0f, 8192.0f, FTZ_OFF, binaryF, 8192.0f), // in derived mode the ulp error is calculated as // exp2(y*log2(x)) and in non-derived it is the same as @@ -325,4 +389,4 @@ const Func functionList[] = { OPERATOR_ENTRY(not, "!", 0.0f, 0.0f, FTZ_OFF, macro_unaryF), }; -const size_t functionListCount = sizeof( functionList ) / sizeof( functionList[0] ); +const size_t functionListCount = sizeof(functionList) / sizeof(functionList[0]); diff --git a/test_conformance/math_brute_force/function_list.h b/test_conformance/math_brute_force/function_list.h new file mode 100644 index 0000000000..95a2945932 --- /dev/null +++ b/test_conformance/math_brute_force/function_list.h @@ -0,0 +1,98 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#ifndef FUNCTION_LIST_H +#define FUNCTION_LIST_H + +#include "harness/compat.h" + +#ifndef WIN32 +#include +#endif + +#if defined(__APPLE__) +#include +#else +#include +#endif + +#include "harness/mt19937.h" + +union fptr { + void *p; + double (*f_f)(double); + double (*f_u)(cl_uint); + int (*i_f)(double); + int (*i_f_f)(float); + float (*f_ff_f)(float, float); + double (*f_ff)(double, double); + int (*i_ff)(double, double); + double (*f_fi)(double, int); + double (*f_fpf)(double, double *); + double (*f_fpI)(double, int *); + double (*f_ffpI)(double, double, int *); + double (*f_fff)(double, double, double); + float (*f_fma)(float, float, float, int); +}; + +union dptr { + void *p; + long double (*f_f)(long double); + long double (*f_u)(cl_ulong); + int (*i_f)(long double); + long double (*f_ff)(long double, long double); + int (*i_ff)(long double, long double); + long double (*f_fi)(long double, int); + long double (*f_fpf)(long double, long double *); + long double (*f_fpI)(long double, int *); + long double (*f_ffpI)(long double, long double, int *); + long double (*f_fff)(long double, long double, long double); +}; + +struct Func; + +struct vtbl +{ + const char *type_name; + int (*TestFunc)(const struct Func *, MTdata, bool); + int (*DoubleTestFunc)( + const struct Func *, MTdata, + bool); // may be NULL if function is single precision only +}; + +struct Func +{ + const char *name; // common name, to be used as an argument in the shell + const char *nameInCode; // name as it appears in the __kernel, usually the + // same as name, but different for multiplication + fptr func; + dptr dfunc; + fptr rfunc; + float float_ulps; + float double_ulps; + float float_embedded_ulps; + float relaxed_error; + float relaxed_embedded_error; + int ftz; + int relaxed; + const vtbl *vtbl_ptr; +}; + + +extern const Func functionList[]; + +extern const size_t functionListCount; + +#endif diff --git a/test_conformance/math_brute_force/i_unary.cpp b/test_conformance/math_brute_force/i_unary.cpp deleted file mode 100644 index 379d8e353f..0000000000 --- a/test_conformance/math_brute_force/i_unary.cpp +++ /dev/null @@ -1,634 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "Utility.h" - -#include -#include "FunctionList.h" - -int TestFunc_Int_Float(const Func *f, MTdata, bool relaxedMode); -int TestFunc_Int_Double(const Func *f, MTdata, bool relaxedMode); - -extern const vtbl _i_unary = { "i_unary", TestFunc_Int_Float, - TestFunc_Int_Double }; - - -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode); -static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode); - -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) -{ - const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global int", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in)\n" - "{\n" - " int i = get_global_id(0);\n" - " out[i] = ", name, "( in[i] );\n" - "}\n" - }; - const char *c3[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global int* out, __global float* in)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " float3 f0 = vload3( 0, in + 3 * i );\n" - " int3 i0 = ", name, "( f0 );\n" - " vstore3( i0, 0, out + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n" - " float3 f0;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " f0 = (float3)( in[3*i], NAN, NAN ); \n" - " break;\n" - " case 0:\n" - " f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n" - " break;\n" - " }\n" - " int3 i0 = ", name, "( f0 );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = i0.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = i0.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - - - const char **kern = c; - size_t kernSize = sizeof(c)/sizeof(c[0]); - - if( sizeValues[vectorSize] == 3 ) - { - kern = c3; - kernSize = sizeof(c3)/sizeof(c3[0]); - } - - char testName[32]; - snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - - return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); -} - -static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) -{ - const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", sizeNames[vectorSize], "( __global int", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in)\n" - "{\n" - " int i = get_global_id(0);\n" - " out[i] = ", name, "( in[i] );\n" - "}\n" - }; - - const char *c3[] = {"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", sizeNames[vectorSize], "( __global int* out, __global double* in)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " double3 f0 = vload3( 0, in + 3 * i );\n" - " int3 i0 = ", name, "( f0 );\n" - " vstore3( i0, 0, out + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n" - " double3 f0;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " f0 = (double3)( in[3*i], NAN, NAN ); \n" - " break;\n" - " case 0:\n" - " f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n" - " break;\n" - " }\n" - " int3 i0 = ", name, "( f0 );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = i0.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = i0.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - - const char **kern = c; - size_t kernSize = sizeof(c)/sizeof(c[0]); - - if( sizeValues[vectorSize] == 3 ) - { - kern = c3; - kernSize = sizeof(c3)/sizeof(c3[0]); - } - - - char testName[32]; - snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - - return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); -} - -typedef struct BuildKernelInfo -{ - cl_uint offset; // the first vector size to build - cl_kernel *kernels; - cl_program *programs; - const char *nameInCode; - bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -}BuildKernelInfo; - -static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); -static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ) -{ - BuildKernelInfo *info = (BuildKernelInfo*) p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernels + i, - info->programs + i, info->relaxedMode); -} - -static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); -static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ) -{ - BuildKernelInfo *info = (BuildKernelInfo*) p; - cl_uint i = info->offset + job_id; - return BuildKernelDouble(info->nameInCode, i, info->kernels + i, - info->programs + i, info->relaxedMode); -} - -int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode) -{ - uint64_t i; - uint32_t j, k; - int error; - cl_program programs[ VECTOR_SIZE_COUNT ]; - cl_kernel kernels[ VECTOR_SIZE_COUNT ]; - int ftz = f->ftz || 0 == (gFloatCapabilities & CL_FP_DENORM) || gForceFTZ; - size_t bufferSize = (gWimpyMode)?gWimpyBufferSize:BUFFER_SIZE; - uint64_t step = bufferSize / sizeof( float ); - int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( float )) + 1); - - logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - if( gWimpyMode ) - { - step = (1ULL<<32) * gWimpyReductionFactor / (512); - } - - // This test is not using ThreadPool so we need to disable FTZ here - // for reference computations - FPU_mode_type oldMode; - DisableFTZ(&oldMode); - - Force64BitFPUPrecision(); - - // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; - if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) - return error; -/* - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - if( (error = BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) ) - return error; -*/ - - for( i = 0; i < (1ULL<<32); i += step ) - { - //Init input array - uint32_t *p = (uint32_t *)gIn; - if( gWimpyMode ) - { - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - p[j] = (uint32_t) i + j * scale; - } - else - { - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - p[j] = (uint32_t) i + j; - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - - // write garbage into output arrays - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - uint32_t pattern = 0xffffdead; - memset_pattern4(gOut[j], &pattern, bufferSize); - if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j ); - goto exit; - } - } - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeValues[j] * sizeof(cl_float); - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; - if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; } - - if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - } - - // Get that moving - if( (error = clFlush(gQueue) )) - vlog( "clFlush failed\n" ); - - //Calculate the correctly rounded reference result - int *r = (int *)gOut_Ref; - float *s = (float *)gIn; - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - r[j] = f->func.i_f( s[j] ); - - // Read the data back - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) ) - { - vlog_error( "ReadArray failed %d\n", error ); - goto exit; - } - } - - if( gSkipCorrectnessTesting ) - break; - - //Verify data - uint32_t *t = (uint32_t *)gOut_Ref; - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - { - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - uint32_t *q = (uint32_t *)(gOut[k]); - // If we aren't getting the correctly rounded result - if( t[j] != q[j] ) - { - if( ftz && IsFloatSubnormal(s[j])) - { - unsigned int correct0 = f->func.i_f( 0.0 ); - unsigned int correct1 = f->func.i_f( -0.0 ); - if( q[j] == correct0 || q[j] == correct1 ) - continue; - } - - uint32_t err = t[j] - q[j]; - if( q[j] > t[j] ) - err = q[j] - t[j]; - vlog_error( "\nERROR: %s%s: %d ulp error at %a (0x%8.8x): *%d vs. %d\n", f->name, sizeNames[k], err, ((float*) gIn)[j], ((cl_uint*) gIn)[j], t[j], q[j] ); - error = -1; - goto exit; - } - } - } - - if( 0 == (i & 0x0fffffff) ) - { - if (gVerboseBruteForce) - { - vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, bufferSize); - } else - { - vlog("." ); - } - fflush(stdout); - } - } - - if( ! gSkipCorrectnessTesting ) - { - if( gWimpyMode ) - vlog( "Wimp pass" ); - else - vlog( "passed" ); - } - - if( gMeasureTimes ) - { - //Init input array - uint32_t *p = (uint32_t *)gIn; - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - p[j] = genrand_int32(d); - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeValues[j] * sizeof(cl_float); - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; - if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; } - - double sum = 0.0; - double bestTime = INFINITY; - for( k = 0; k < PERF_LOOP_COUNT; k++ ) - { - uint64_t startTime = GetTime(); - if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - - // Make sure OpenCL is done - if( (error = clFinish(gQueue) ) ) - { - vlog_error( "Error %d at clFinish\n", error ); - goto exit; - } - - uint64_t endTime = GetTime(); - double time = SubtractTime( endTime, startTime ); - sum += time; - if( time < bestTime ) - bestTime = time; - } - - if( gReportAverageTimes ) - bestTime = sum / PERF_LOOP_COUNT; - double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) ); - vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] ); - } - } - - vlog( "\n" ); -exit: - RestoreFPState(&oldMode); - // Release - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - clReleaseKernel(kernels[k]); - clReleaseProgram(programs[k]); - } - - return error; -} - -int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode) -{ - uint64_t i; - uint32_t j, k; - int error; - cl_program programs[ VECTOR_SIZE_COUNT ]; - cl_kernel kernels[ VECTOR_SIZE_COUNT ]; - int ftz = f->ftz || gForceFTZ; - size_t bufferSize = (gWimpyMode)?gWimpyBufferSize:BUFFER_SIZE; - uint64_t step = bufferSize / sizeof( cl_double ); - int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( cl_double )) + 1); - - logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); - if( gWimpyMode ) - { - step = (1ULL<<32) * gWimpyReductionFactor / (512); - } - // This test is not using ThreadPool so we need to disable FTZ here - // for reference computations - FPU_mode_type oldMode; - DisableFTZ(&oldMode); - - Force64BitFPUPrecision(); - - // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; - if( (error = ThreadPool_Do( BuildKernel_DoubleFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info ) )) - { - return error; - } -/* - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - if( (error = BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) ) - return error; -*/ - - for( i = 0; i < (1ULL<<32); i += step ) - { - //Init input array - double *p = (double *)gIn; - if( gWimpyMode ) - { - for( j = 0; j < bufferSize / sizeof( cl_double ); j++ ) - p[j] = DoubleFromUInt32( (uint32_t) i + j * scale ); - } - else - { - for( j = 0; j < bufferSize / sizeof( cl_double ); j++ ) - p[j] = DoubleFromUInt32( (uint32_t) i + j ); - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - - // write garbage into output arrays - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - uint32_t pattern = 0xffffdead; - memset_pattern4(gOut[j], &pattern, bufferSize); - if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j ); - goto exit; - } - } - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeValues[j] * sizeof(cl_double); - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; - if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; } - - if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - } - - // Get that moving - if( (error = clFlush(gQueue) )) - vlog( "clFlush failed\n" ); - - //Calculate the correctly rounded reference result - int *r = (int *)gOut_Ref; - double *s = (double *)gIn; - for( j = 0; j < bufferSize / sizeof( cl_double ); j++ ) - r[j] = f->dfunc.i_f( s[j] ); - - // Read the data back - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) ) - { - vlog_error( "ReadArray failed %d\n", error ); - goto exit; - } - } - - if( gSkipCorrectnessTesting ) - break; - - //Verify data - uint32_t *t = (uint32_t *)gOut_Ref; - for( j = 0; j < bufferSize / sizeof( cl_double ); j++ ) - { - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - uint32_t *q = (uint32_t *)(gOut[k]); - // If we aren't getting the correctly rounded result - if( t[j] != q[j] ) - { - if( ftz && IsDoubleSubnormal(s[j])) - { - unsigned int correct0 = f->dfunc.i_f( 0.0 ); - unsigned int correct1 = f->dfunc.i_f( -0.0 ); - if( q[j] == correct0 || q[j] == correct1 ) - continue; - } - - uint32_t err = t[j] - q[j]; - if( q[j] > t[j] ) - err = q[j] - t[j]; - vlog_error( "\nERROR: %sD%s: %d ulp error at %.13la: *%d vs. %d\n", f->name, sizeNames[k], err, ((double*) gIn)[j], t[j], q[j] ); - error = -1; - goto exit; - } - } - } - - if( 0 == (i & 0x0fffffff) ) - { - if (gVerboseBruteForce) - { - vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, bufferSize); - } else - { - vlog("." ); - } - fflush(stdout); - - } - } - - if( ! gSkipCorrectnessTesting ) - { - if( gWimpyMode ) - vlog( "Wimp pass" ); - else - vlog( "passed" ); - } - - if( gMeasureTimes ) - { - //Init input array - double *p = (double *)gIn; - for( j = 0; j < bufferSize / sizeof( cl_double ); j++ ) - p[j] = DoubleFromUInt32( genrand_int32(d) ); - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeValues[j] * sizeof(cl_double); - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; - if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; } - - double sum = 0.0; - double bestTime = INFINITY; - for( k = 0; k < PERF_LOOP_COUNT; k++ ) - { - uint64_t startTime = GetTime(); - if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - - // Make sure OpenCL is done - if( (error = clFinish(gQueue) ) ) - { - vlog_error( "Error %d at clFinish\n", error ); - goto exit; - } - - uint64_t endTime = GetTime(); - double time = SubtractTime( endTime, startTime ); - sum += time; - if( time < bestTime ) - bestTime = time; - } - - if( gReportAverageTimes ) - bestTime = sum / PERF_LOOP_COUNT; - double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) ); - vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] ); - } - for( ; j < gMaxVectorSizeIndex; j++ ) - vlog( "\t -- " ); - } - - vlog( "\n" ); - - -exit: - RestoreFPState(&oldMode); - // Release - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - clReleaseKernel(kernels[k]); - clReleaseProgram(programs[k]); - } - - return error; -} - diff --git a/test_conformance/math_brute_force/i_unary_double.cpp b/test_conformance/math_brute_force/i_unary_double.cpp new file mode 100644 index 0000000000..d09e14c128 --- /dev/null +++ b/test_conformance/math_brute_force/i_unary_double.cpp @@ -0,0 +1,306 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "function_list.h" +#include "test_functions.h" +#include "utility.h" + +#include + +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) +{ + const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global int", + sizeNames[vectorSize], + "* out, __global double", + sizeNames[vectorSize], + "* in )\n" + "{\n" + " size_t i = get_global_id(0);\n" + " out[i] = ", + name, + "( in[i] );\n" + "}\n" }; + + const char *c3[] = { + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global int* out, __global double* in)\n" + "{\n" + " size_t i = get_global_id(0);\n" + " if( i + 1 < get_global_size(0) )\n" + " {\n" + " double3 f0 = vload3( 0, in + 3 * i );\n" + " int3 i0 = ", + name, + "( f0 );\n" + " vstore3( i0, 0, out + 3*i );\n" + " }\n" + " else\n" + " {\n" + " size_t parity = i & 1; // Figure out how many elements are " + "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " + "buffer size \n" + " double3 f0;\n" + " switch( parity )\n" + " {\n" + " case 1:\n" + " f0 = (double3)( in[3*i], NAN, NAN ); \n" + " break;\n" + " case 0:\n" + " f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n" + " break;\n" + " }\n" + " int3 i0 = ", + name, + "( f0 );\n" + " switch( parity )\n" + " {\n" + " case 0:\n" + " out[3*i+1] = i0.y; \n" + " // fall through\n" + " case 1:\n" + " out[3*i] = i0.x; \n" + " break;\n" + " }\n" + " }\n" + "}\n" + }; + + const char **kern = c; + size_t kernSize = sizeof(c) / sizeof(c[0]); + + if (sizeValues[vectorSize] == 3) + { + kern = c3; + kernSize = sizeof(c3) / sizeof(c3[0]); + } + + char testName[32]; + snprintf(testName, sizeof(testName) - 1, "math_kernel%s", + sizeNames[vectorSize]); + + return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); +} + +struct BuildKernelInfo +{ + cl_uint offset; // the first vector size to build + cl_kernel *kernels; + cl_program *programs; + const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. +}; + +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +{ + BuildKernelInfo *info = (BuildKernelInfo *)p; + cl_uint i = info->offset + job_id; + return BuildKernel(info->nameInCode, i, info->kernels + i, + info->programs + i, info->relaxedMode); +} + +} // anonymous namespace + +int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode) +{ + int error; + cl_program programs[VECTOR_SIZE_COUNT]; + cl_kernel kernels[VECTOR_SIZE_COUNT]; + int ftz = f->ftz || gForceFTZ; + uint64_t step = getTestStep(sizeof(cl_double), BUFFER_SIZE); + int scale = + (int)((1ULL << 32) / (16 * BUFFER_SIZE / sizeof(cl_double)) + 1); + + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); + + // This test is not using ThreadPool so we need to disable FTZ here + // for reference computations + FPU_mode_type oldMode; + DisableFTZ(&oldMode); + + Force64BitFPUPrecision(); + + // Init the kernels + { + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + return error; + } + + for (uint64_t i = 0; i < (1ULL << 32); i += step) + { + // Init input array + double *p = (double *)gIn; + if (gWimpyMode) + { + for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++) + p[j] = DoubleFromUInt32((uint32_t)i + j * scale); + } + else + { + for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++) + p[j] = DoubleFromUInt32((uint32_t)i + j); + } + + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, + BUFFER_SIZE, gIn, 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); + return error; + } + + // write garbage into output arrays + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + uint32_t pattern = 0xffffdead; + memset_pattern4(gOut[j], &pattern, BUFFER_SIZE); + if ((error = + clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, + BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", + error, j); + goto exit; + } + } + + // Run the kernels + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + size_t vectorSize = sizeValues[j] * sizeof(cl_double); + size_t localCount = (BUFFER_SIZE + vectorSize - 1) + / vectorSize; // BUFFER_SIZE / vectorSize rounded up + if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]), + &gOutBuffer[j]))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer), + &gInBuffer))) + { + LogBuildError(programs[j]); + goto exit; + } + + if ((error = + clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, + &localCount, NULL, 0, NULL, NULL))) + { + vlog_error("FAILED -- could not execute kernel\n"); + goto exit; + } + } + + // Get that moving + if ((error = clFlush(gQueue))) vlog("clFlush failed\n"); + + // Calculate the correctly rounded reference result + int *r = (int *)gOut_Ref; + double *s = (double *)gIn; + for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++) + r[j] = f->dfunc.i_f(s[j]); + + // Read the data back + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + if ((error = + clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, + BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + { + vlog_error("ReadArray failed %d\n", error); + goto exit; + } + } + + if (gSkipCorrectnessTesting) break; + + // Verify data + uint32_t *t = (uint32_t *)gOut_Ref; + for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++) + { + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + uint32_t *q = (uint32_t *)(gOut[k]); + // If we aren't getting the correctly rounded result + if (t[j] != q[j]) + { + if (ftz && IsDoubleSubnormal(s[j])) + { + unsigned int correct0 = f->dfunc.i_f(0.0); + unsigned int correct1 = f->dfunc.i_f(-0.0); + if (q[j] == correct0 || q[j] == correct1) continue; + } + + uint32_t err = t[j] - q[j]; + if (q[j] > t[j]) err = q[j] - t[j]; + vlog_error( + "\nERROR: %sD%s: %d ulp error at %.13la: *%d vs. %d\n", + f->name, sizeNames[k], err, ((double *)gIn)[j], t[j], + q[j]); + error = -1; + goto exit; + } + } + } + + if (0 == (i & 0x0fffffff)) + { + if (gVerboseBruteForce) + { + vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, + BUFFER_SIZE); + } + else + { + vlog("."); + } + + fflush(stdout); + } + } + + if (!gSkipCorrectnessTesting) + { + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + } + + vlog("\n"); + +exit: + RestoreFPState(&oldMode); + // Release + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + clReleaseKernel(kernels[k]); + clReleaseProgram(programs[k]); + } + + return error; +} diff --git a/test_conformance/math_brute_force/i_unary_float.cpp b/test_conformance/math_brute_force/i_unary_float.cpp new file mode 100644 index 0000000000..89b566d99c --- /dev/null +++ b/test_conformance/math_brute_force/i_unary_float.cpp @@ -0,0 +1,302 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "function_list.h" +#include "test_functions.h" +#include "utility.h" + +#include + +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) +{ + const char *c[] = { "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global int", + sizeNames[vectorSize], + "* out, __global float", + sizeNames[vectorSize], + "* in)\n" + "{\n" + " size_t i = get_global_id(0);\n" + " out[i] = ", + name, + "( in[i] );\n" + "}\n" }; + + const char *c3[] = { + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global int* out, __global float* in)\n" + "{\n" + " size_t i = get_global_id(0);\n" + " if( i + 1 < get_global_size(0) )\n" + " {\n" + " float3 f0 = vload3( 0, in + 3 * i );\n" + " int3 i0 = ", + name, + "( f0 );\n" + " vstore3( i0, 0, out + 3*i );\n" + " }\n" + " else\n" + " {\n" + " size_t parity = i & 1; // Figure out how many elements are " + "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " + "buffer size \n" + " float3 f0;\n" + " switch( parity )\n" + " {\n" + " case 1:\n" + " f0 = (float3)( in[3*i], NAN, NAN ); \n" + " break;\n" + " case 0:\n" + " f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n" + " break;\n" + " }\n" + " int3 i0 = ", + name, + "( f0 );\n" + " switch( parity )\n" + " {\n" + " case 0:\n" + " out[3*i+1] = i0.y; \n" + " // fall through\n" + " case 1:\n" + " out[3*i] = i0.x; \n" + " break;\n" + " }\n" + " }\n" + "}\n" + }; + + const char **kern = c; + size_t kernSize = sizeof(c) / sizeof(c[0]); + + if (sizeValues[vectorSize] == 3) + { + kern = c3; + kernSize = sizeof(c3) / sizeof(c3[0]); + } + + char testName[32]; + snprintf(testName, sizeof(testName) - 1, "math_kernel%s", + sizeNames[vectorSize]); + + return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); +} + +struct BuildKernelInfo +{ + cl_uint offset; // the first vector size to build + cl_kernel *kernels; + cl_program *programs; + const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. +}; + +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +{ + BuildKernelInfo *info = (BuildKernelInfo *)p; + cl_uint i = info->offset + job_id; + return BuildKernel(info->nameInCode, i, info->kernels + i, + info->programs + i, info->relaxedMode); +} + +} // anonymous namespace + +int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode) +{ + int error; + cl_program programs[VECTOR_SIZE_COUNT]; + cl_kernel kernels[VECTOR_SIZE_COUNT]; + int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE); + int scale = (int)((1ULL << 32) / (16 * BUFFER_SIZE / sizeof(float)) + 1); + + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + + // This test is not using ThreadPool so we need to disable FTZ here + // for reference computations + FPU_mode_type oldMode; + DisableFTZ(&oldMode); + + Force64BitFPUPrecision(); + + // Init the kernels + { + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + return error; + } + + for (uint64_t i = 0; i < (1ULL << 32); i += step) + { + // Init input array + cl_uint *p = (cl_uint *)gIn; + if (gWimpyMode) + { + for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++) + p[j] = (cl_uint)i + j * scale; + } + else + { + for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++) + p[j] = (uint32_t)i + j; + } + + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, + BUFFER_SIZE, gIn, 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); + return error; + } + + // write garbage into output arrays + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + uint32_t pattern = 0xffffdead; + memset_pattern4(gOut[j], &pattern, BUFFER_SIZE); + if ((error = + clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, + BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", + error, j); + goto exit; + } + } + + // Run the kernels + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + size_t vectorSize = sizeValues[j] * sizeof(cl_float); + size_t localCount = (BUFFER_SIZE + vectorSize - 1) + / vectorSize; // BUFFER_SIZE / vectorSize rounded up + if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]), + &gOutBuffer[j]))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer), + &gInBuffer))) + { + LogBuildError(programs[j]); + goto exit; + } + + if ((error = + clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, + &localCount, NULL, 0, NULL, NULL))) + { + vlog_error("FAILED -- could not execute kernel\n"); + goto exit; + } + } + + // Get that moving + if ((error = clFlush(gQueue))) vlog("clFlush failed\n"); + + // Calculate the correctly rounded reference result + int *r = (int *)gOut_Ref; + float *s = (float *)gIn; + for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++) + r[j] = f->func.i_f(s[j]); + + // Read the data back + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + if ((error = + clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, + BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + { + vlog_error("ReadArray failed %d\n", error); + goto exit; + } + } + + if (gSkipCorrectnessTesting) break; + + // Verify data + uint32_t *t = (uint32_t *)gOut_Ref; + for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++) + { + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + uint32_t *q = (uint32_t *)(gOut[k]); + // If we aren't getting the correctly rounded result + if (t[j] != q[j]) + { + if (ftz && IsFloatSubnormal(s[j])) + { + unsigned int correct0 = f->func.i_f(0.0); + unsigned int correct1 = f->func.i_f(-0.0); + if (q[j] == correct0 || q[j] == correct1) continue; + } + + uint32_t err = t[j] - q[j]; + if (q[j] > t[j]) err = q[j] - t[j]; + vlog_error("\nERROR: %s%s: %d ulp error at %a (0x%8.8x): " + "*%d vs. %d\n", + f->name, sizeNames[k], err, ((float *)gIn)[j], + ((cl_uint *)gIn)[j], t[j], q[j]); + error = -1; + goto exit; + } + } + } + + if (0 == (i & 0x0fffffff)) + { + if (gVerboseBruteForce) + { + vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, + BUFFER_SIZE); + } + else + { + vlog("."); + } + fflush(stdout); + } + } + + if (!gSkipCorrectnessTesting) + { + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + } + + vlog("\n"); + +exit: + RestoreFPState(&oldMode); + // Release + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + clReleaseKernel(kernels[k]); + clReleaseProgram(programs[k]); + } + + return error; +} diff --git a/test_conformance/math_brute_force/macro_binary.cpp b/test_conformance/math_brute_force/macro_binary.cpp deleted file mode 100644 index b590f50a85..0000000000 --- a/test_conformance/math_brute_force/macro_binary.cpp +++ /dev/null @@ -1,1252 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "Utility.h" - -#include -#include "FunctionList.h" - -int TestMacro_Int_Float_Float(const Func *f, MTdata, bool relaxedMode); -int TestMacro_Int_Double_Double(const Func *f, MTdata, bool relaxedMode); - -extern const vtbl _macro_binary = { "macro_binary", TestMacro_Int_Float_Float, - TestMacro_Int_Double_Double }; - -static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p ); -static int BuildKernelDouble(const char *name, int vectorSize, - cl_uint kernel_count, cl_kernel *k, cl_program *p, - bool relaxedMode); - -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) -{ - const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global int", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2 )\n" - "{\n" - " int i = get_global_id(0);\n" - " out[i] = ", name, "( in1[i], in2[i] );\n" - "}\n" - }; - - const char *c3[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global int* out, __global float* in, __global float* in2)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " float3 f0 = vload3( 0, in + 3 * i );\n" - " float3 f1 = vload3( 0, in2 + 3 * i );\n" - " int3 i0 = ", name, "( f0, f1 );\n" - " vstore3( i0, 0, out + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n" - " float3 f0, f1;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " f0 = (float3)( in[3*i], NAN, NAN ); \n" - " f1 = (float3)( in2[3*i], NAN, NAN ); \n" - " break;\n" - " case 0:\n" - " f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n" - " f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n" - " break;\n" - " }\n" - " int3 i0 = ", name, "( f0, f1 );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = i0.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = i0.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - - - const char **kern = c; - size_t kernSize = sizeof(c)/sizeof(c[0]); - - if( sizeValues[vectorSize] == 3 ) - { - kern = c3; - kernSize = sizeof(c3)/sizeof(c3[0]); - } - - char testName[32]; - snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - - return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, - relaxedMode); -} - - -static int BuildKernelDouble(const char *name, int vectorSize, - cl_uint kernel_count, cl_kernel *k, cl_program *p, - bool relaxedMode) -{ - const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", sizeNames[vectorSize], "( __global long", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2 )\n" - "{\n" - " int i = get_global_id(0);\n" - " out[i] = ", name, "( in1[i], in2[i] );\n" - "}\n" - }; - - const char *c3[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", sizeNames[vectorSize], "( __global long* out, __global double* in, __global double* in2)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " double3 f0 = vload3( 0, in + 3 * i );\n" - " double3 f1 = vload3( 0, in2 + 3 * i );\n" - " long3 l0 = ", name, "( f0, f1 );\n" - " vstore3( l0, 0, out + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n" - " double3 f0, f1;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " f0 = (double3)( in[3*i], NAN, NAN ); \n" - " f1 = (double3)( in2[3*i], NAN, NAN ); \n" - " break;\n" - " case 0:\n" - " f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n" - " f1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n" - " break;\n" - " }\n" - " long3 l0 = ", name, "( f0, f1 );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = l0.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = l0.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - - const char **kern = c; - size_t kernSize = sizeof(c)/sizeof(c[0]); - - if( sizeValues[vectorSize] == 3 ) - { - kern = c3; - kernSize = sizeof(c3)/sizeof(c3[0]); - } - - - char testName[32]; - snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - - return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, - relaxedMode); -} - -typedef struct BuildKernelInfo -{ - cl_uint offset; // the first vector size to build - cl_uint kernel_count; - cl_kernel **kernels; - cl_program *programs; - const char *nameInCode; - bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -}BuildKernelInfo; - -static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); -static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ) -{ - BuildKernelInfo *info = (BuildKernelInfo*) p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i], info->programs + i, info->relaxedMode); -} - -static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); -static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ) -{ - BuildKernelInfo *info = (BuildKernelInfo*) p; - cl_uint i = info->offset + job_id; - return BuildKernelDouble(info->nameInCode, i, info->kernel_count, - info->kernels[i], info->programs + i, - info->relaxedMode); -} - - -// A table of more difficult cases to get right -static const float specialValuesFloat[] = { - -NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39), MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38), - MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f, -4.0f, -3.5f, - -3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25), - MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27), - MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150), - MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f, - - +NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38), - MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f, - +3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25), - MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27), - MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150), - MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f -}; - -static const size_t specialValuesFloatCount = sizeof(specialValuesFloat) / sizeof(specialValuesFloat[0]); - -//Thread specific data for a worker thread -typedef struct ThreadInfo -{ - cl_mem inBuf; // input buffer for the thread - cl_mem inBuf2; // input buffer for the thread - cl_mem outBuf[ VECTOR_SIZE_COUNT ]; // output buffers for the thread - MTdata d; - cl_command_queue tQueue; // per thread command queue to improve performance -}ThreadInfo; - -typedef struct TestInfo -{ - size_t subBufferSize; // Size of the sub-buffer in elements - const Func *f; // A pointer to the function info - cl_program programs[ VECTOR_SIZE_COUNT ]; // programs for various vector sizes - cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id] - ThreadInfo *tinfo; // An array of thread specific information for each worker thread - cl_uint threadCount; // Number of worker threads - cl_uint jobCount; // Number of jobs - cl_uint step; // step between each chunk and the next. - cl_uint scale; // stride between individual test values - int ftz; // non-zero if running in flush to zero mode - -}TestInfo; - -static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p ); - -int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - size_t i, j; - - logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - - // Init test_info - memset( &test_info, 0, sizeof( test_info ) ); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = 1; - if (gWimpyMode) - { - test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor; - } - test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - //there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); - - // cl_kernels aren't thread safe, so we make one for each vector size for every thread - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - { - size_t array_size = test_info.threadCount * sizeof( cl_kernel ); - test_info.k[i] = (cl_kernel*)malloc( array_size ); - if( NULL == test_info.k[i] ) - { - vlog_error( "Error: Unable to allocate storage for kernels!\n" ); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset( test_info.k[i], 0, array_size ); - } - test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) ); - if( NULL == test_info.tinfo ) - { - vlog_error( "Error: Unable to allocate storage for thread specific data.\n" ); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) ); - for( i = 0; i < test_info.threadCount; i++ ) - { - cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_float), test_info.subBufferSize * sizeof( cl_float) }; - test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if( error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if( error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if( error || NULL == test_info.tinfo[i].outBuf[j] ) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - } - test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error); - if( NULL == test_info.tinfo[i].tQueue || error ) - { - vlog_error( "clCreateCommandQueue failed. (%d)\n", error ); - goto exit; - } - - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) - goto exit; - } - - - // Run the kernels - if( !gSkipCorrectnessTesting ) - { - error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info ); - - if( error ) - goto exit; - - if( gWimpyMode ) - vlog( "Wimp pass" ); - else - vlog( "passed" ); - } - - if( gMeasureTimes ) - { - //Init input arrays - uint32_t *p = (uint32_t *)gIn; - uint32_t *p2 = (uint32_t *)gIn2; - for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ ) - { - p[j] = genrand_int32(d); - p2[j] = genrand_int32(d); - } - - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error ); - return error; - } - - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeValues[j] * sizeof(cl_float); - size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; - if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; } - if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; } - if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; } - - double sum = 0.0; - double bestTime = INFINITY; - for( i = 0; i < PERF_LOOP_COUNT; i++ ) - { - uint64_t startTime = GetTime(); - if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - - // Make sure OpenCL is done - if( (error = clFinish(gQueue) ) ) - { - vlog_error( "Error %d at clFinish\n", error ); - goto exit; - } - - uint64_t endTime = GetTime(); - double time = SubtractTime( endTime, startTime ); - sum += time; - if( time < bestTime ) - bestTime = time; - } - - if( gReportAverageTimes ) - bestTime = sum / PERF_LOOP_COUNT; - double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( float ) ); - vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] ); - } - } - vlog( "\n" ); - -exit: - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - { - clReleaseProgram(test_info.programs[i]); - if( test_info.k[i] ) - { - for( j = 0; j < test_info.threadCount; j++ ) - clReleaseKernel(test_info.k[i][j]); - - free( test_info.k[i] ); - } - } - if( test_info.tinfo ) - { - for( i = 0; i < test_info.threadCount; i++ ) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free( test_info.tinfo ); - } - - return error; -} - -static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data ) -{ - const TestInfo *job = (const TestInfo *) data; - size_t buffer_elements = job->subBufferSize; - size_t buffer_size = buffer_elements * sizeof( cl_float ); - cl_uint base = job_id * (cl_uint) job->step; - ThreadInfo *tinfo = job->tinfo + thread_id; - fptr func = job->f->func; - int ftz = job->ftz; - MTdata d = tinfo->d; - cl_uint j, k; - cl_int error; - const char *name = job->f->name; - cl_int *t,*r; - cl_float *s,*s2; - - // start the map of the output arrays - cl_event e[ VECTOR_SIZE_COUNT ]; - cl_int *out[ VECTOR_SIZE_COUNT ]; - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - out[j] = (cl_int*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error); - if( error || NULL == out[j]) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - return error; - } - } - - // Get that moving - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush failed\n" ); - - //Init input array - cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements; - cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements; - j = 0; - int totalSpecialValueCount = specialValuesFloatCount * specialValuesFloatCount; - int indx = (totalSpecialValueCount - 1) / buffer_elements; - - if( job_id <= (cl_uint)indx ) - { // test edge cases - float *fp = (float *)p; - float *fp2 = (float *)p2; - uint32_t x, y; - - x = (job_id * buffer_elements) % specialValuesFloatCount; - y = (job_id * buffer_elements) / specialValuesFloatCount; - - for( ; j < buffer_elements; j++ ) - { - fp[j] = specialValuesFloat[x]; - fp2[j] = specialValuesFloat[y]; - if( ++x >= specialValuesFloatCount ) - { - x = 0; - y++; - if( y >= specialValuesFloatCount ) - break; - } - } - } - - //Init any remaining values. - for( ; j < buffer_elements; j++ ) - { - p[j] = genrand_int32(d); - p2[j] = genrand_int32(d); - } - - - if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error ); - goto exit; - } - - if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error ); - goto exit; - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - //Wait for the map to finish - if( (error = clWaitForEvents(1, e + j) )) - { - vlog_error( "Error: clWaitForEvents failed! err: %d\n", error ); - goto exit; - } - if( (error = clReleaseEvent( e[j] ) )) - { - vlog_error( "Error: clReleaseEvent failed! err: %d\n", error ); - goto exit; - } - - // Fill the result buffer with garbage, so that old results don't carry over - uint32_t pattern = 0xffffdead; - memset_pattern4(out[j], &pattern, buffer_size); - if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error ); - goto exit; - } - - // run the kernel - size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j]; - cl_kernel kernel = job->k[j][thread_id]; //each worker thread has its own copy of the cl_kernel - cl_program program = job->programs[j]; - - if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; } - if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; } - if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; } - - if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL))) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - } - - // Get that moving - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush 2 failed\n" ); - - if( gSkipCorrectnessTesting ) - return CL_SUCCESS; - - //Calculate the correctly rounded reference result - r = (cl_int *)gOut_Ref + thread_id * buffer_elements; - s = (float *)gIn + thread_id * buffer_elements; - s2 = (float *)gIn2 + thread_id * buffer_elements; - for( j = 0; j < buffer_elements; j++ ) - r[j] = func.i_ff( s[j], s2[j] ); - - - // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue. - for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ ) - { - out[j] = (cl_int*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error); - if( error || NULL == out[j] ) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - goto exit; - } - } - - // Wait for the last buffer - out[j] = (cl_int*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error); - if( error || NULL == out[j] ) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - goto exit; - } - - //Verify data - t = (cl_int *)r; - for( j = 0; j < buffer_elements; j++ ) - { - cl_int *q = out[0]; - - if( gMinVectorSizeIndex == 0 && t[j] != q[j] ) - { - if( ftz ) - { - if( IsFloatSubnormal( s[j]) ) - { - if( IsFloatSubnormal( s2[j] ) ) - { - int correct = func.i_ff( 0.0f, 0.0f ); - int correct2 = func.i_ff( 0.0f, -0.0f ); - int correct3 = func.i_ff( -0.0f, 0.0f ); - int correct4 = func.i_ff( -0.0f, -0.0f ); - - if( correct == q[j] || correct2 == q[j] || correct3 == q[j] || correct4 == q[j] ) - continue; - } - else - { - int correct = func.i_ff( 0.0f, s2[j] ); - int correct2 = func.i_ff( -0.0f, s2[j] ); - if( correct == q[j] || correct2 == q[j] ) - continue; - } - } - else if( IsFloatSubnormal( s2[j] ) ) - { - int correct = func.i_ff( s[j], 0.0f ); - int correct2 = func.i_ff( s[j], -0.0f ); - if( correct == q[j] || correct2 == q[j] ) - continue; - } - - } - - uint32_t err = t[j] - q[j]; - if( q[j] > t[j] ) - err = q[j] - t[j]; - vlog_error( "\nERROR: %s: %d ulp error at {%a, %a}: *0x%8.8x vs. 0x%8.8x (index: %d)\n", name, err, ((float*) s)[j], ((float*) s2)[j], t[j], q[j], j ); - error = -1; - goto exit; - } - - for( k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++ ) - { - q = out[k]; - // If we aren't getting the correctly rounded result - if( -t[j] != q[j] ) - { - if( ftz ) - { - if( IsFloatSubnormal( s[j]) ) - { - if( IsFloatSubnormal( s2[j] ) ) - { - int correct = -func.i_ff( 0.0f, 0.0f ); - int correct2 = -func.i_ff( 0.0f, -0.0f ); - int correct3 = -func.i_ff( -0.0f, 0.0f ); - int correct4 = -func.i_ff( -0.0f, -0.0f ); - - if( correct == q[j] || correct2 == q[j] || correct3 == q[j] || correct4 == q[j] ) - continue; - } - else - { - int correct = -func.i_ff( 0.0f, s2[j] ); - int correct2 = -func.i_ff( -0.0f, s2[j] ); - if( correct == q[j] || correct2 == q[j] ) - continue; - } - } - else if( IsFloatSubnormal( s2[j] ) ) - { - int correct = -func.i_ff( s[j], 0.0f ); - int correct2 = -func.i_ff( s[j], -0.0f ); - if( correct == q[j] || correct2 == q[j] ) - continue; - } - - } - cl_uint err = -t[j] - q[j]; - if( q[j] > -t[j] ) - err = q[j] + t[j]; - vlog_error( "\nERROR: %s%s: %d ulp error at {%a, %a}: *0x%8.8x vs. 0x%8.8x (index: %d)\n", name, sizeNames[k], err, ((float*) s)[j], ((float*) s2)[j], -t[j], q[j], j ); - error = -1; - goto exit; - } - } - } - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) ) - { - vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error ); - return error; - } - } - - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush 3 failed\n" ); - - - if( 0 == ( base & 0x0fffffff) ) - { - if (gVerboseBruteForce) - { - vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->threadCount); - } else - { - vlog("." ); - } - fflush(stdout); - } - -exit: - return error; -} - - -// A table of more difficult cases to get right -static const double specialValuesDouble[] = { - -NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10), - MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100., -4.0, -3.5, - -3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53), - MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55), - MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074), - MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074), - MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0, - - +NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10), - MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100., +4.0, +3.5, - +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53), - MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55), - MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074), - MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074), - MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0, -}; - -static size_t specialValuesDoubleCount = sizeof( specialValuesDouble ) / sizeof( specialValuesDouble[0] ); - - -static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *p ); - -int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - size_t i, j; - - logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); - - // Init test_info - memset( &test_info, 0, sizeof( test_info ) ); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = 1; - if (gWimpyMode) - { - test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor; - } - - test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - //there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ftz = f->ftz || gForceFTZ; - - // cl_kernels aren't thread safe, so we make one for each vector size for every thread - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - { - size_t array_size = test_info.threadCount * sizeof( cl_kernel ); - test_info.k[i] = (cl_kernel*)malloc( array_size ); - if( NULL == test_info.k[i] ) - { - vlog_error( "Error: Unable to allocate storage for kernels!\n" ); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset( test_info.k[i], 0, array_size ); - } - test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) ); - if( NULL == test_info.tinfo ) - { - vlog_error( "Error: Unable to allocate storage for thread specific data.\n" ); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) ); - for( i = 0; i < test_info.threadCount; i++ ) - { - cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_double), test_info.subBufferSize * sizeof( cl_double) }; - test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if( error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if( error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - /* Qualcomm fix: 9461 read-write flags must be compatible with parent buffer */ - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - /* Qualcomm fix: end */ - if( error || NULL == test_info.tinfo[i].outBuf[j] ) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - } - test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error); - if( NULL == test_info.tinfo[i].tQueue || error ) - { - vlog_error( "clCreateCommandQueue failed. (%d)\n", error ); - goto exit; - } - - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); - } - - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) - goto exit; - } - - if( !gSkipCorrectnessTesting ) - { - error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info ); - - if( error ) - goto exit; - - if( gWimpyMode ) - vlog( "Wimp pass" ); - else - vlog( "passed" ); - } - - if( gMeasureTimes ) - { - //Init input arrays - uint64_t *p = (uint64_t *)gIn; - uint64_t *p2 = (uint64_t *)gIn2; - for( j = 0; j < BUFFER_SIZE / sizeof( double ); j++ ) - { - p[j] = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32); - p2[j] = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32); - } - - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error ); - return error; - } - - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeValues[j] * sizeof(cl_double); - size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; - if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; } - if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; } - if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; } - - double sum = 0.0; - double bestTime = INFINITY; - for( i = 0; i < PERF_LOOP_COUNT; i++ ) - { - uint64_t startTime = GetTime(); - if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - - // Make sure OpenCL is done - if( (error = clFinish(gQueue) ) ) - { - vlog_error( "Error %d at clFinish\n", error ); - goto exit; - } - - uint64_t endTime = GetTime(); - double time = SubtractTime( endTime, startTime ); - sum += time; - if( time < bestTime ) - bestTime = time; - } - - if( gReportAverageTimes ) - bestTime = sum / PERF_LOOP_COUNT; - double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( double ) ); - vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] ); - } - for( ; j < gMaxVectorSizeIndex; j++ ) - vlog( "\t -- " ); - } - - vlog( "\n" ); - -exit: - // Release - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - { - clReleaseProgram(test_info.programs[i]); - if( test_info.k[i] ) - { - for( j = 0; j < test_info.threadCount; j++ ) - clReleaseKernel(test_info.k[i][j]); - - free( test_info.k[i] ); - } - } - if( test_info.tinfo ) - { - for( i = 0; i < test_info.threadCount; i++ ) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free( test_info.tinfo ); - } - - return error; -} - -static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data ) -{ - const TestInfo *job = (const TestInfo *) data; - size_t buffer_elements = job->subBufferSize; - size_t buffer_size = buffer_elements * sizeof( cl_double ); - cl_uint base = job_id * (cl_uint) job->step; - ThreadInfo *tinfo = job->tinfo + thread_id; - dptr dfunc = job->f->dfunc; - int ftz = job->ftz; - MTdata d = tinfo->d; - cl_uint j, k; - cl_int error; - const char *name = job->f->name; - cl_long *t,*r; - cl_double *s,*s2; - - Force64BitFPUPrecision(); - - // start the map of the output arrays - cl_event e[ VECTOR_SIZE_COUNT ]; - cl_long *out[ VECTOR_SIZE_COUNT ]; - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - out[j] = (cl_long*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error); - if( error || NULL == out[j]) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - return error; - } - } - - // Get that moving - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush failed\n" ); - - //Init input array - double *p = (double *)gIn + thread_id * buffer_elements; - double *p2 = (double *)gIn2 + thread_id * buffer_elements; - j = 0; - int totalSpecialValueCount = specialValuesDoubleCount * specialValuesDoubleCount; - int indx = (totalSpecialValueCount - 1) / buffer_elements; - - if( job_id <= (cl_uint)indx ) - { // test edge cases - uint32_t x, y; - - x = (job_id * buffer_elements) % specialValuesDoubleCount; - y = (job_id * buffer_elements) / specialValuesDoubleCount; - - for( ; j < buffer_elements; j++ ) - { - p[j] = specialValuesDouble[x]; - p2[j] = specialValuesDouble[y]; - if( ++x >= specialValuesDoubleCount ) - { - x = 0; - y++; - if( y >= specialValuesDoubleCount ) - break; - } - } - } - - //Init any remaining values. - for( ; j < buffer_elements; j++ ) - { - ((cl_ulong*)p)[j] = genrand_int64(d); - ((cl_ulong*)p2)[j] = genrand_int64(d); - } - - - if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error ); - goto exit; - } - - if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error ); - goto exit; - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - //Wait for the map to finish - if( (error = clWaitForEvents(1, e + j) )) - { - vlog_error( "Error: clWaitForEvents failed! err: %d\n", error ); - goto exit; - } - if( (error = clReleaseEvent( e[j] ) )) - { - vlog_error( "Error: clReleaseEvent failed! err: %d\n", error ); - goto exit; - } - - // Fill the result buffer with garbage, so that old results don't carry over - uint32_t pattern = 0xffffdead; - memset_pattern4(out[j], &pattern, buffer_size); - if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error ); - goto exit; - } - - // run the kernel - size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j]; - cl_kernel kernel = job->k[j][thread_id]; //each worker thread has its own copy of the cl_kernel - cl_program program = job->programs[j]; - - if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; } - if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; } - if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; } - - if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL))) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - } - - // Get that moving - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush 2 failed\n" ); - - if( gSkipCorrectnessTesting ) - return CL_SUCCESS; - - //Calculate the correctly rounded reference result - r = (cl_long *)gOut_Ref + thread_id * buffer_elements; - s = (cl_double *)gIn + thread_id * buffer_elements; - s2 = (cl_double *)gIn2 + thread_id * buffer_elements; - for( j = 0; j < buffer_elements; j++ ) - r[j] = dfunc.i_ff( s[j], s2[j] ); - - - // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue. - for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ ) - { - out[j] = (cl_long*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error); - if( error || NULL == out[j] ) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - goto exit; - } - } - - // Wait for the last buffer - out[j] = (cl_long*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error); - if( error || NULL == out[j] ) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - goto exit; - } - - //Verify data - t = (cl_long *)r; - for( j = 0; j < buffer_elements; j++ ) - { - cl_long *q = (cl_long *) out[0]; - - // If we aren't getting the correctly rounded result - if( gMinVectorSizeIndex == 0 && t[j] != q[j] ) - { - if( ftz ) - { - if( IsDoubleSubnormal( s[j]) ) - { - if( IsDoubleSubnormal( s2[j] ) ) - { - int64_t correct = dfunc.i_ff( 0.0f, 0.0f ); - int64_t correct2 = dfunc.i_ff( 0.0f, -0.0f ); - int64_t correct3 = dfunc.i_ff( -0.0f, 0.0f ); - int64_t correct4 = dfunc.i_ff( -0.0f, -0.0f ); - - if( correct == q[j] || correct2 == q[j] || correct3 == q[j] || correct4 == q[j] ) - continue; - } - else - { - int64_t correct = dfunc.i_ff( 0.0f, s2[j] ); - int64_t correct2 = dfunc.i_ff( -0.0f, s2[j] ); - if( correct == q[j] || correct2 == q[j] ) - continue; - } - } - else if( IsDoubleSubnormal( s2[j] ) ) - { - int64_t correct = dfunc.i_ff( s[j], 0.0f ); - int64_t correct2 = dfunc.i_ff( s[j], -0.0f ); - if( correct == q[j] || correct2 == q[j] ) - continue; - } - - } - - uint64_t err = t[j] - q[j]; - if( q[j] > t[j] ) - err = q[j] - t[j]; - vlog_error( "\nERROR: %s: %lld ulp error at {%.13la, %.13la}: *%lld vs. %lld (index: %d)\n", name, err, ((double*) s)[j], ((double*) s2)[j], t[j], q[j], j ); - error = -1; - goto exit; - } - - - for( k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++ ) - { - q = (cl_long*) out[k]; - // If we aren't getting the correctly rounded result - if( -t[j] != q[j] ) - { - if( ftz ) - { - if( IsDoubleSubnormal( s[j]) ) - { - if( IsDoubleSubnormal( s2[j] ) ) - { - int64_t correct = -dfunc.i_ff( 0.0f, 0.0f ); - int64_t correct2 = -dfunc.i_ff( 0.0f, -0.0f ); - int64_t correct3 = -dfunc.i_ff( -0.0f, 0.0f ); - int64_t correct4 = -dfunc.i_ff( -0.0f, -0.0f ); - - if( correct == q[j] || correct2 == q[j] || correct3 == q[j] || correct4 == q[j] ) - continue; - } - else - { - int64_t correct = -dfunc.i_ff( 0.0f, s2[j] ); - int64_t correct2 = -dfunc.i_ff( -0.0f, s2[j] ); - if( correct == q[j] || correct2 == q[j] ) - continue; - } - } - else if( IsDoubleSubnormal( s2[j] ) ) - { - int64_t correct = -dfunc.i_ff( s[j], 0.0f ); - int64_t correct2 = -dfunc.i_ff( s[j], -0.0f ); - if( correct == q[j] || correct2 == q[j] ) - continue; - } - - } - - uint64_t err = -t[j] - q[j]; - if( q[j] > -t[j] ) - err = q[j] + t[j]; - vlog_error( "\nERROR: %sD%s: %lld ulp error at {%.13la, %.13la}: *%lld vs. %lld (index: %d)\n", name, sizeNames[k], err, ((double*) s)[j], ((double*) s2)[j], -t[j], q[j], j ); - error = -1; - goto exit; - } - } - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) ) - { - vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error ); - return error; - } - } - - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush 3 failed\n" ); - - - if( 0 == ( base & 0x0fffffff) ) - { - if (gVerboseBruteForce) - { - vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->threadCount); - } else - { - vlog("." ); - } - fflush(stdout); - } - -exit: - return error; -} - diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp new file mode 100644 index 0000000000..d3e8071fb3 --- /dev/null +++ b/test_conformance/math_brute_force/macro_binary_double.cpp @@ -0,0 +1,719 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "common.h" +#include "function_list.h" +#include "test_functions.h" +#include "utility.h" + +#include + +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) +{ + const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global long", + sizeNames[vectorSize], + "* out, __global double", + sizeNames[vectorSize], + "* in1, __global double", + sizeNames[vectorSize], + "* in2 )\n" + "{\n" + " size_t i = get_global_id(0);\n" + " out[i] = ", + name, + "( in1[i], in2[i] );\n" + "}\n" }; + + const char *c3[] = { + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global long* out, __global double* in, __global double* in2)\n" + "{\n" + " size_t i = get_global_id(0);\n" + " if( i + 1 < get_global_size(0) )\n" + " {\n" + " double3 f0 = vload3( 0, in + 3 * i );\n" + " double3 f1 = vload3( 0, in2 + 3 * i );\n" + " long3 l0 = ", + name, + "( f0, f1 );\n" + " vstore3( l0, 0, out + 3*i );\n" + " }\n" + " else\n" + " {\n" + " size_t parity = i & 1; // Figure out how many elements are " + "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " + "buffer size \n" + " double3 f0;\n" + " double3 f1;\n" + " switch( parity )\n" + " {\n" + " case 1:\n" + " f0 = (double3)( in[3*i], NAN, NAN ); \n" + " f1 = (double3)( in2[3*i], NAN, NAN ); \n" + " break;\n" + " case 0:\n" + " f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n" + " f1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n" + " break;\n" + " }\n" + " long3 l0 = ", + name, + "( f0, f1 );\n" + " switch( parity )\n" + " {\n" + " case 0:\n" + " out[3*i+1] = l0.y; \n" + " // fall through\n" + " case 1:\n" + " out[3*i] = l0.x; \n" + " break;\n" + " }\n" + " }\n" + "}\n" + }; + + const char **kern = c; + size_t kernSize = sizeof(c) / sizeof(c[0]); + + if (sizeValues[vectorSize] == 3) + { + kern = c3; + kernSize = sizeof(c3) / sizeof(c3[0]); + } + + char testName[32]; + snprintf(testName, sizeof(testName) - 1, "math_kernel%s", + sizeNames[vectorSize]); + + return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, + relaxedMode); +} + +struct BuildKernelInfo +{ + cl_uint offset; // the first vector size to build + cl_uint kernel_count; + KernelMatrix &kernels; + cl_program *programs; + const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. +}; + +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +{ + BuildKernelInfo *info = (BuildKernelInfo *)p; + cl_uint i = info->offset + job_id; + return BuildKernel(info->nameInCode, i, info->kernel_count, + info->kernels[i].data(), info->programs + i, + info->relaxedMode); +} + +// Thread specific data for a worker thread +struct ThreadInfo +{ + cl_mem inBuf; // input buffer for the thread + cl_mem inBuf2; // input buffer for the thread + cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread + MTdata d; + cl_command_queue tQueue; // per thread command queue to improve performance +}; + +struct TestInfo +{ + size_t subBufferSize; // Size of the sub-buffer in elements + const Func *f; // A pointer to the function info + cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes + + // Thread-specific kernels for each vector size: + // k[vector_size][thread_id] + KernelMatrix k; + + // Array of thread specific information + std::vector tinfo; + + cl_uint threadCount; // Number of worker threads + cl_uint jobCount; // Number of jobs + cl_uint step; // step between each chunk and the next. + cl_uint scale; // stride between individual test values + int ftz; // non-zero if running in flush to zero mode +}; + +// A table of more difficult cases to get right +const double specialValues[] = { + -NAN, + -INFINITY, + -DBL_MAX, + MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), + MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), + MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), + MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10), + MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), + MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), + MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), + MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), + -1000.0, + -100.0, + -4.0, + -3.5, + -3.0, + MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), + -2.5, + MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), + -2.0, + MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), + -1.5, + MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52), + MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), + -1.0, + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53), + MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), + -0.5, + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54), + MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), + -0.25, + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55), + MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), + -DBL_MIN, + MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), + MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), + MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), + MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), + MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), + -0.0, + + +NAN, + +INFINITY, + +DBL_MAX, + MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), + MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11), + MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), + MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10), + MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), + MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), + MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), + MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), + +1000.0, + +100.0, + +4.0, + +3.5, + +3.0, + MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), + +2.5, + MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), + +2.0, + MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), + +1.5, + MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52), + MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), + +1.0, + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53), + MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), + +0.5, + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54), + MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), + +0.25, + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55), + MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), + +DBL_MIN, + MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), + MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), + MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), + MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), + MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), + +0.0, +}; + +constexpr size_t specialValuesCount = + sizeof(specialValues) / sizeof(specialValues[0]); + +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +{ + TestInfo *job = (TestInfo *)data; + size_t buffer_elements = job->subBufferSize; + size_t buffer_size = buffer_elements * sizeof(cl_double); + cl_uint base = job_id * (cl_uint)job->step; + ThreadInfo *tinfo = &(job->tinfo[thread_id]); + dptr dfunc = job->f->dfunc; + int ftz = job->ftz; + MTdata d = tinfo->d; + cl_int error; + const char *name = job->f->name; + cl_long *t; + cl_long *r; + cl_double *s; + cl_double *s2; + + Force64BitFPUPrecision(); + + // start the map of the output arrays + cl_event e[VECTOR_SIZE_COUNT]; + cl_long *out[VECTOR_SIZE_COUNT]; + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + out[j] = (cl_long *)clEnqueueMapBuffer( + tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, + buffer_size, 0, NULL, e + j, &error); + if (error || NULL == out[j]) + { + vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, + error); + return error; + } + } + + // Get that moving + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n"); + + // Init input array + double *p = (double *)gIn + thread_id * buffer_elements; + double *p2 = (double *)gIn2 + thread_id * buffer_elements; + cl_uint idx = 0; + int totalSpecialValueCount = specialValuesCount * specialValuesCount; + int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements; + + if (job_id <= (cl_uint)lastSpecialJobIndex) + { // test edge cases + uint32_t x, y; + + x = (job_id * buffer_elements) % specialValuesCount; + y = (job_id * buffer_elements) / specialValuesCount; + + for (; idx < buffer_elements; idx++) + { + p[idx] = specialValues[x]; + p2[idx] = specialValues[y]; + if (++x >= specialValuesCount) + { + x = 0; + y++; + if (y >= specialValuesCount) break; + } + } + } + + // Init any remaining values. + for (; idx < buffer_elements; idx++) + { + ((cl_ulong *)p)[idx] = genrand_int64(d); + ((cl_ulong *)p2)[idx] = genrand_int64(d); + } + + if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, + buffer_size, p, 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error); + goto exit; + } + + if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, + buffer_size, p2, 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + // Wait for the map to finish + if ((error = clWaitForEvents(1, e + j))) + { + vlog_error("Error: clWaitForEvents failed! err: %d\n", error); + goto exit; + } + if ((error = clReleaseEvent(e[j]))) + { + vlog_error("Error: clReleaseEvent failed! err: %d\n", error); + goto exit; + } + + // Fill the result buffer with garbage, so that old results don't carry + // over + uint32_t pattern = 0xffffdead; + memset_pattern4(out[j], &pattern, buffer_size); + if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], + out[j], 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error); + goto exit; + } + + // run the kernel + size_t vectorCount = + (buffer_elements + sizeValues[j] - 1) / sizeValues[j]; + cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its + // own copy of the cl_kernel + cl_program program = job->programs[j]; + + if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]), + &tinfo->outBuf[j]))) + { + LogBuildError(program); + return error; + } + if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), + &tinfo->inBuf))) + { + LogBuildError(program); + return error; + } + if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), + &tinfo->inBuf2))) + { + LogBuildError(program); + return error; + } + + if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, + &vectorCount, NULL, 0, NULL, NULL))) + { + vlog_error("FAILED -- could not execute kernel\n"); + goto exit; + } + } + + // Get that moving + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n"); + + if (gSkipCorrectnessTesting) return CL_SUCCESS; + + // Calculate the correctly rounded reference result + r = (cl_long *)gOut_Ref + thread_id * buffer_elements; + s = (cl_double *)gIn + thread_id * buffer_elements; + s2 = (cl_double *)gIn2 + thread_id * buffer_elements; + for (size_t j = 0; j < buffer_elements; j++) r[j] = dfunc.i_ff(s[j], s2[j]); + + // Read the data back -- no need to wait for the first N-1 buffers but wait + // for the last buffer. This is an in order queue. + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE; + out[j] = (cl_long *)clEnqueueMapBuffer( + tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0, + buffer_size, 0, NULL, NULL, &error); + if (error || NULL == out[j]) + { + vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, + error); + goto exit; + } + } + + // Verify data + t = (cl_long *)r; + for (size_t j = 0; j < buffer_elements; j++) + { + cl_long *q = out[0]; + + // If we aren't getting the correctly rounded result + if (gMinVectorSizeIndex == 0 && t[j] != q[j]) + { + // If we aren't getting the correctly rounded result + if (ftz) + { + if (IsDoubleSubnormal(s[j])) + { + if (IsDoubleSubnormal(s2[j])) + { + int64_t correct = dfunc.i_ff(0.0f, 0.0f); + int64_t correct2 = dfunc.i_ff(0.0f, -0.0f); + int64_t correct3 = dfunc.i_ff(-0.0f, 0.0f); + int64_t correct4 = dfunc.i_ff(-0.0f, -0.0f); + + if (correct == q[j] || correct2 == q[j] + || correct3 == q[j] || correct4 == q[j]) + continue; + } + else + { + int64_t correct = dfunc.i_ff(0.0f, s2[j]); + int64_t correct2 = dfunc.i_ff(-0.0f, s2[j]); + if (correct == q[j] || correct2 == q[j]) continue; + } + } + else if (IsDoubleSubnormal(s2[j])) + { + int64_t correct = dfunc.i_ff(s[j], 0.0f); + int64_t correct2 = dfunc.i_ff(s[j], -0.0f); + if (correct == q[j] || correct2 == q[j]) continue; + } + } + + cl_ulong err = t[j] - q[j]; + if (q[j] > t[j]) err = q[j] - t[j]; + vlog_error("\nERROR: %s: %lld ulp error at {%.13la, %.13la}: *%lld " + "vs. %lld (index: %d)\n", + name, err, ((double *)s)[j], ((double *)s2)[j], t[j], + q[j], j); + error = -1; + goto exit; + } + + + for (auto k = std::max(1U, gMinVectorSizeIndex); + k < gMaxVectorSizeIndex; k++) + { + q = (cl_long *)out[k]; + // If we aren't getting the correctly rounded result + if (-t[j] != q[j]) + { + if (ftz) + { + if (IsDoubleSubnormal(s[j])) + { + if (IsDoubleSubnormal(s2[j])) + { + int64_t correct = -dfunc.i_ff(0.0f, 0.0f); + int64_t correct2 = -dfunc.i_ff(0.0f, -0.0f); + int64_t correct3 = -dfunc.i_ff(-0.0f, 0.0f); + int64_t correct4 = -dfunc.i_ff(-0.0f, -0.0f); + + if (correct == q[j] || correct2 == q[j] + || correct3 == q[j] || correct4 == q[j]) + continue; + } + else + { + int64_t correct = -dfunc.i_ff(0.0f, s2[j]); + int64_t correct2 = -dfunc.i_ff(-0.0f, s2[j]); + if (correct == q[j] || correct2 == q[j]) continue; + } + } + else if (IsDoubleSubnormal(s2[j])) + { + int64_t correct = -dfunc.i_ff(s[j], 0.0f); + int64_t correct2 = -dfunc.i_ff(s[j], -0.0f); + if (correct == q[j] || correct2 == q[j]) continue; + } + } + + cl_ulong err = -t[j] - q[j]; + if (q[j] > -t[j]) err = q[j] + t[j]; + vlog_error("\nERROR: %sD%s: %lld ulp error at {%.13la, " + "%.13la}: *%lld vs. %lld (index: %d)\n", + name, sizeNames[k], err, ((double *)s)[j], + ((double *)s2)[j], -t[j], q[j], j); + error = -1; + goto exit; + } + } + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], + out[j], 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", + j, error); + return error; + } + } + + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n"); + + + if (0 == (base & 0x0fffffff)) + { + if (gVerboseBruteForce) + { + vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd " + "ThreadCount:%2u\n", + base, job->step, job->scale, buffer_elements, + job->threadCount); + } + else + { + vlog("."); + } + fflush(stdout); + } + +exit: + return error; +} + +} // anonymous namespace + +int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info{}; + cl_int error; + + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); + + // Init test_info + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_double)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ftz = f->ftz || gForceFTZ; + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + test_info.k[i].resize(test_info.threadCount, nullptr); + } + + test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_double), + test_info.subBufferSize * sizeof(cl_double) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + test_info.tinfo[i].inBuf2 = + clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf2) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + + test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + for (auto &kernel : test_info.k[i]) + { + clReleaseKernel(kernel); + } + } + + for (auto &threadInfo : test_info.tinfo) + { + free_mtdata(threadInfo.d); + clReleaseMemObject(threadInfo.inBuf); + clReleaseMemObject(threadInfo.inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(threadInfo.outBuf[j]); + clReleaseCommandQueue(threadInfo.tQueue); + } + + return error; +} diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp new file mode 100644 index 0000000000..6c7c8c05e7 --- /dev/null +++ b/test_conformance/math_brute_force/macro_binary_float.cpp @@ -0,0 +1,708 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "common.h" +#include "function_list.h" +#include "test_functions.h" +#include "utility.h" + +#include + +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) +{ + const char *c[] = { "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global int", + sizeNames[vectorSize], + "* out, __global float", + sizeNames[vectorSize], + "* in1, __global float", + sizeNames[vectorSize], + "* in2 )\n" + "{\n" + " size_t i = get_global_id(0);\n" + " out[i] = ", + name, + "( in1[i], in2[i] );\n" + "}\n" }; + + const char *c3[] = { + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global int* out, __global float* in, __global float* in2)\n" + "{\n" + " size_t i = get_global_id(0);\n" + " if( i + 1 < get_global_size(0) )\n" + " {\n" + " float3 f0 = vload3( 0, in + 3 * i );\n" + " float3 f1 = vload3( 0, in2 + 3 * i );\n" + " int3 i0 = ", + name, + "( f0, f1 );\n" + " vstore3( i0, 0, out + 3*i );\n" + " }\n" + " else\n" + " {\n" + " size_t parity = i & 1; // Figure out how many elements are " + "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " + "buffer size \n" + " float3 f0;\n" + " float3 f1;\n" + " switch( parity )\n" + " {\n" + " case 1:\n" + " f0 = (float3)( in[3*i], NAN, NAN ); \n" + " f1 = (float3)( in2[3*i], NAN, NAN ); \n" + " break;\n" + " case 0:\n" + " f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n" + " f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n" + " break;\n" + " }\n" + " int3 i0 = ", + name, + "( f0, f1 );\n" + " switch( parity )\n" + " {\n" + " case 0:\n" + " out[3*i+1] = i0.y; \n" + " // fall through\n" + " case 1:\n" + " out[3*i] = i0.x; \n" + " break;\n" + " }\n" + " }\n" + "}\n" + }; + + const char **kern = c; + size_t kernSize = sizeof(c) / sizeof(c[0]); + + if (sizeValues[vectorSize] == 3) + { + kern = c3; + kernSize = sizeof(c3) / sizeof(c3[0]); + } + + char testName[32]; + snprintf(testName, sizeof(testName) - 1, "math_kernel%s", + sizeNames[vectorSize]); + + return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, + relaxedMode); +} + +struct BuildKernelInfo +{ + cl_uint offset; // the first vector size to build + cl_uint kernel_count; + KernelMatrix &kernels; + cl_program *programs; + const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. +}; + +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +{ + BuildKernelInfo *info = (BuildKernelInfo *)p; + cl_uint i = info->offset + job_id; + return BuildKernel(info->nameInCode, i, info->kernel_count, + info->kernels[i].data(), info->programs + i, + info->relaxedMode); +} + +// Thread specific data for a worker thread +struct ThreadInfo +{ + cl_mem inBuf; // input buffer for the thread + cl_mem inBuf2; // input buffer for the thread + cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread + MTdata d; + cl_command_queue tQueue; // per thread command queue to improve performance +}; + +struct TestInfo +{ + size_t subBufferSize; // Size of the sub-buffer in elements + const Func *f; // A pointer to the function info + cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes + + // Thread-specific kernels for each vector size: + // k[vector_size][thread_id] + KernelMatrix k; + + // Array of thread specific information + std::vector tinfo; + + cl_uint threadCount; // Number of worker threads + cl_uint jobCount; // Number of jobs + cl_uint step; // step between each chunk and the next. + cl_uint scale; // stride between individual test values + int ftz; // non-zero if running in flush to zero mode +}; + +// A table of more difficult cases to get right +const float specialValues[] = { + -NAN, + -INFINITY, + -FLT_MAX, + MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), + MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), + MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39), + MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), + MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), + MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38), + MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), + MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), + MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), + MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), + MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), + MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), + -1000.f, + -100.f, + -4.0f, + -3.5f, + -3.0f, + MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), + -2.5f, + MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), + -2.0f, + MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), + -1.5f, + MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24), + MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), + -1.0f, + MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25), + MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), + -0.5f, + MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), + MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), + -0.25f, + MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27), + MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), + -FLT_MIN, + MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), + MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), + MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), + MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), + MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), + MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150), + MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), + MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), + MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), + MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), + -0.0f, + + +NAN, + +INFINITY, + +FLT_MAX, + MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), + MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), + MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), + MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), + MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), + MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38), + MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), + MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), + MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), + MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), + MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), + MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), + +1000.f, + +100.f, + +4.0f, + +3.5f, + +3.0f, + MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), + 2.5f, + MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23), + +2.0f, + MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), + 1.5f, + MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), + MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), + +1.0f, + MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25), + MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), + +0.5f, + MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), + MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), + +0.25f, + MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27), + MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), + +FLT_MIN, + MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), + MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), + MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), + MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), + MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), + MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150), + MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), + MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), + MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), + MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), + +0.0f, +}; + +constexpr size_t specialValuesCount = + sizeof(specialValues) / sizeof(specialValues[0]); + +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +{ + TestInfo *job = (TestInfo *)data; + size_t buffer_elements = job->subBufferSize; + size_t buffer_size = buffer_elements * sizeof(cl_float); + cl_uint base = job_id * (cl_uint)job->step; + ThreadInfo *tinfo = &(job->tinfo[thread_id]); + fptr func = job->f->func; + int ftz = job->ftz; + MTdata d = tinfo->d; + cl_int error; + const char *name = job->f->name; + cl_int *t = 0; + cl_int *r = 0; + cl_float *s = 0; + cl_float *s2 = 0; + + // start the map of the output arrays + cl_event e[VECTOR_SIZE_COUNT]; + cl_int *out[VECTOR_SIZE_COUNT]; + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + out[j] = (cl_int *)clEnqueueMapBuffer( + tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, + buffer_size, 0, NULL, e + j, &error); + if (error || NULL == out[j]) + { + vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, + error); + return error; + } + } + + // Get that moving + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n"); + + // Init input array + cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements; + cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements; + cl_uint idx = 0; + + int totalSpecialValueCount = specialValuesCount * specialValuesCount; + int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements; + + if (job_id <= (cl_uint)lastSpecialJobIndex) + { // test edge cases + float *fp = (float *)p; + float *fp2 = (float *)p2; + uint32_t x, y; + + x = (job_id * buffer_elements) % specialValuesCount; + y = (job_id * buffer_elements) / specialValuesCount; + + for (; idx < buffer_elements; idx++) + { + fp[idx] = specialValues[x]; + fp2[idx] = specialValues[y]; + ++x; + if (x >= specialValuesCount) + { + x = 0; + y++; + if (y >= specialValuesCount) break; + } + } + } + + // Init any remaining values. + for (; idx < buffer_elements; idx++) + { + p[idx] = genrand_int32(d); + p2[idx] = genrand_int32(d); + } + + if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, + buffer_size, p, 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error); + goto exit; + } + + if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, + buffer_size, p2, 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + // Wait for the map to finish + if ((error = clWaitForEvents(1, e + j))) + { + vlog_error("Error: clWaitForEvents failed! err: %d\n", error); + goto exit; + } + if ((error = clReleaseEvent(e[j]))) + { + vlog_error("Error: clReleaseEvent failed! err: %d\n", error); + goto exit; + } + + // Fill the result buffer with garbage, so that old results don't carry + // over + uint32_t pattern = 0xffffdead; + memset_pattern4(out[j], &pattern, buffer_size); + if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], + out[j], 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error); + goto exit; + } + + // run the kernel + size_t vectorCount = + (buffer_elements + sizeValues[j] - 1) / sizeValues[j]; + cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its + // own copy of the cl_kernel + cl_program program = job->programs[j]; + + if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]), + &tinfo->outBuf[j]))) + { + LogBuildError(program); + return error; + } + if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), + &tinfo->inBuf))) + { + LogBuildError(program); + return error; + } + if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), + &tinfo->inBuf2))) + { + LogBuildError(program); + return error; + } + + if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, + &vectorCount, NULL, 0, NULL, NULL))) + { + vlog_error("FAILED -- could not execute kernel\n"); + goto exit; + } + } + + // Get that moving + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n"); + + if (gSkipCorrectnessTesting) return CL_SUCCESS; + + // Calculate the correctly rounded reference result + r = (cl_int *)gOut_Ref + thread_id * buffer_elements; + s = (float *)gIn + thread_id * buffer_elements; + s2 = (float *)gIn2 + thread_id * buffer_elements; + for (size_t j = 0; j < buffer_elements; j++) r[j] = func.i_ff(s[j], s2[j]); + + // Read the data back -- no need to wait for the first N-1 buffers but wait + // for the last buffer. This is an in order queue. + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE; + out[j] = (cl_int *)clEnqueueMapBuffer( + tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0, + buffer_size, 0, NULL, NULL, &error); + if (error || NULL == out[j]) + { + vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, + error); + goto exit; + } + } + + // Verify data + t = (cl_int *)r; + for (size_t j = 0; j < buffer_elements; j++) + { + cl_int *q = out[0]; + + if (gMinVectorSizeIndex == 0 && t[j] != q[j]) + { + if (ftz) + { + if (IsFloatSubnormal(s[j])) + { + if (IsFloatSubnormal(s2[j])) + { + int correct = func.i_ff(0.0f, 0.0f); + int correct2 = func.i_ff(0.0f, -0.0f); + int correct3 = func.i_ff(-0.0f, 0.0f); + int correct4 = func.i_ff(-0.0f, -0.0f); + + if (correct == q[j] || correct2 == q[j] + || correct3 == q[j] || correct4 == q[j]) + continue; + } + else + { + int correct = func.i_ff(0.0f, s2[j]); + int correct2 = func.i_ff(-0.0f, s2[j]); + if (correct == q[j] || correct2 == q[j]) continue; + } + } + else if (IsFloatSubnormal(s2[j])) + { + int correct = func.i_ff(s[j], 0.0f); + int correct2 = func.i_ff(s[j], -0.0f); + if (correct == q[j] || correct2 == q[j]) continue; + } + } + + uint32_t err = t[j] - q[j]; + if (q[j] > t[j]) err = q[j] - t[j]; + vlog_error("\nERROR: %s: %d ulp error at {%a, %a}: *0x%8.8x vs. " + "0x%8.8x (index: %d)\n", + name, err, ((float *)s)[j], ((float *)s2)[j], t[j], q[j], + j); + error = -1; + goto exit; + } + + for (auto k = std::max(1U, gMinVectorSizeIndex); + k < gMaxVectorSizeIndex; k++) + { + q = out[k]; + // If we aren't getting the correctly rounded result + if (-t[j] != q[j]) + { + if (ftz) + { + if (IsFloatSubnormal(s[j])) + { + if (IsFloatSubnormal(s2[j])) + { + int correct = -func.i_ff(0.0f, 0.0f); + int correct2 = -func.i_ff(0.0f, -0.0f); + int correct3 = -func.i_ff(-0.0f, 0.0f); + int correct4 = -func.i_ff(-0.0f, -0.0f); + + if (correct == q[j] || correct2 == q[j] + || correct3 == q[j] || correct4 == q[j]) + continue; + } + else + { + int correct = -func.i_ff(0.0f, s2[j]); + int correct2 = -func.i_ff(-0.0f, s2[j]); + if (correct == q[j] || correct2 == q[j]) continue; + } + } + else if (IsFloatSubnormal(s2[j])) + { + int correct = -func.i_ff(s[j], 0.0f); + int correct2 = -func.i_ff(s[j], -0.0f); + if (correct == q[j] || correct2 == q[j]) continue; + } + } + cl_uint err = -t[j] - q[j]; + if (q[j] > -t[j]) err = q[j] + t[j]; + vlog_error("\nERROR: %s%s: %d ulp error at {%a, %a}: *0x%8.8x " + "vs. 0x%8.8x (index: %d)\n", + name, sizeNames[k], err, ((float *)s)[j], + ((float *)s2)[j], -t[j], q[j], j); + error = -1; + goto exit; + } + } + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], + out[j], 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", + j, error); + return error; + } + } + + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n"); + + + if (0 == (base & 0x0fffffff)) + { + if (gVerboseBruteForce) + { + vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd " + "ThreadCount:%2u\n", + base, job->step, job->scale, buffer_elements, + job->threadCount); + } + else + { + vlog("."); + } + fflush(stdout); + } + +exit: + return error; +} + +} // anonymous namespace + +int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info{}; + cl_int error; + + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + + // Init test_info + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_float)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ftz = + f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + test_info.k[i].resize(test_info.threadCount, nullptr); + } + + test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_float), + test_info.subBufferSize * sizeof(cl_float) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + test_info.tinfo[i].inBuf2 = + clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf2) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + + test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + for (auto &kernel : test_info.k[i]) + { + clReleaseKernel(kernel); + } + } + + for (auto &threadInfo : test_info.tinfo) + { + free_mtdata(threadInfo.d); + clReleaseMemObject(threadInfo.inBuf); + clReleaseMemObject(threadInfo.inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(threadInfo.outBuf[j]); + clReleaseCommandQueue(threadInfo.tQueue); + } + + return error; +} diff --git a/test_conformance/math_brute_force/macro_unary.cpp b/test_conformance/math_brute_force/macro_unary.cpp deleted file mode 100644 index 872007f156..0000000000 --- a/test_conformance/math_brute_force/macro_unary.cpp +++ /dev/null @@ -1,1007 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "Utility.h" - -#include -#include "FunctionList.h" - -int TestMacro_Int_Float(const Func *f, MTdata, bool relaxedMode); -int TestMacro_Int_Double(const Func *f, MTdata, bool relaxedMode); - -extern const vtbl _macro_unary = { "macro_unary", TestMacro_Int_Float, - TestMacro_Int_Double }; - -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode); -static int BuildKernelDouble(const char *name, int vectorSize, - cl_uint kernel_count, cl_kernel *k, cl_program *p, - bool relaxedMode); - -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) -{ - const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global int", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in)\n" - "{\n" - " int i = get_global_id(0);\n" - " out[i] = ", name, "( in[i] );\n" - "}\n" - }; - const char *c3[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global int* out, __global float* in)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " float3 f0 = vload3( 0, in + 3 * i );\n" - " int3 i0 = ", name, "( f0 );\n" - " vstore3( i0, 0, out + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n" - " int3 i0;\n" - " float3 f0;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " f0 = (float3)( in[3*i], 0xdead, 0xdead ); \n" - " break;\n" - " case 0:\n" - " f0 = (float3)( in[3*i], in[3*i+1], 0xdead ); \n" - " break;\n" - " }\n" - " i0 = ", name, "( f0 );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = i0.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = i0.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - - const char **kern = c; - size_t kernSize = sizeof(c)/sizeof(c[0]); - - if( sizeValues[vectorSize] == 3 ) - { - kern = c3; - kernSize = sizeof(c3)/sizeof(c3[0]); - } - - char testName[32]; - snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - - return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, - relaxedMode); -} - -static int BuildKernelDouble(const char *name, int vectorSize, - cl_uint kernel_count, cl_kernel *k, cl_program *p, - bool relaxedMode) -{ - const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", sizeNames[vectorSize], "( __global long", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in)\n" - "{\n" - " int i = get_global_id(0);\n" - " out[i] = ", name, "( in[i] );\n" - "}\n" - }; - - const char *c3[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", sizeNames[vectorSize], "( __global long* out, __global double* in)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " double3 d0 = vload3( 0, in + 3 * i );\n" - " long3 l0 = ", name, "( d0 );\n" - " vstore3( l0, 0, out + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n" - " double3 d0;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " d0 = (double3)( in[3*i], NAN, NAN ); \n" - " break;\n" - " case 0:\n" - " d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n" - " break;\n" - " }\n" - " long3 l0 = ", name, "( d0 );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = l0.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = l0.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - - const char **kern = c; - size_t kernSize = sizeof(c)/sizeof(c[0]); - - if( sizeValues[vectorSize] == 3 ) - { - kern = c3; - kernSize = sizeof(c3)/sizeof(c3[0]); - } - - - char testName[32]; - snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - - return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, - relaxedMode); -} - -typedef struct BuildKernelInfo -{ - cl_uint offset; // the first vector size to build - cl_uint kernel_count; - cl_kernel **kernels; - cl_program *programs; - const char *nameInCode; - bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -}BuildKernelInfo; - -static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); -static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ) -{ - BuildKernelInfo *info = (BuildKernelInfo*) p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i], info->programs + i, info->relaxedMode); -} - -static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); -static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ) -{ - BuildKernelInfo *info = (BuildKernelInfo*) p; - cl_uint i = info->offset + job_id; - return BuildKernelDouble(info->nameInCode, i, info->kernel_count, - info->kernels[i], info->programs + i, - info->relaxedMode); -} - -//Thread specific data for a worker thread -typedef struct ThreadInfo -{ - cl_mem inBuf; // input buffer for the thread - cl_mem outBuf[ VECTOR_SIZE_COUNT ]; // output buffers for the thread - cl_command_queue tQueue; // per thread command queue to improve performance -}ThreadInfo; - -typedef struct TestInfo -{ - size_t subBufferSize; // Size of the sub-buffer in elements - const Func *f; // A pointer to the function info - cl_program programs[ VECTOR_SIZE_COUNT ]; // programs for various vector sizes - cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id] - ThreadInfo *tinfo; // An array of thread specific information for each worker thread - cl_uint threadCount; // Number of worker threads - cl_uint jobCount; // Number of jobs - cl_uint step; // step between each chunk and the next. - cl_uint scale; // stride between individual test values - int ftz; // non-zero if running in flush to zero mode - -}TestInfo; - -static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p ); - -int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - size_t i, j; - - logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - - // Init test_info - memset( &test_info, 0, sizeof( test_info ) ); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = 1; - if (gWimpyMode ) - { - test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor; - } - test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - //there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); - // cl_kernels aren't thread safe, so we make one for each vector size for every thread - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - { - size_t array_size = test_info.threadCount * sizeof( cl_kernel ); - test_info.k[i] = (cl_kernel*)malloc( array_size ); - if( NULL == test_info.k[i] ) - { - vlog_error( "Error: Unable to allocate storage for kernels!\n" ); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset( test_info.k[i], 0, array_size ); - } - test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) ); - if( NULL == test_info.tinfo ) - { - vlog_error( "Error: Unable to allocate storage for thread specific data.\n" ); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) ); - for( i = 0; i < test_info.threadCount; i++ ) - { - cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_float), test_info.subBufferSize * sizeof( cl_float) }; - test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if( error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if( error || NULL == test_info.tinfo[i].outBuf[j] ) - { - vlog_error( "Error: Unable to create sub-buffer of gOutBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - } - test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error); - if( NULL == test_info.tinfo[i].tQueue || error ) - { - vlog_error( "clCreateCommandQueue failed. (%d)\n", error ); - goto exit; - } - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) - goto exit; - } - - if( !gSkipCorrectnessTesting ) - { - error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info ); - - if( error ) - goto exit; - - if( gWimpyMode ) - vlog( "Wimp pass" ); - else - vlog( "passed" ); - } - - if( gMeasureTimes ) - { - //Init input array - cl_uint *p = (cl_uint *)gIn; - for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ ) - p[j] = genrand_int32(d); - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeof( cl_float ) * sizeValues[j]; - size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; // BUFFER_SIZE / vectorSize rounded up - if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; } - if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; } - - double sum = 0.0; - double bestTime = INFINITY; - for( i = 0; i < PERF_LOOP_COUNT; i++ ) - { - uint64_t startTime = GetTime(); - if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - - // Make sure OpenCL is done - if( (error = clFinish(gQueue) ) ) - { - vlog_error( "Error %d at clFinish\n", error ); - goto exit; - } - - uint64_t endTime = GetTime(); - double time = SubtractTime( endTime, startTime ); - sum += time; - if( time < bestTime ) - bestTime = time; - } - - if( gReportAverageTimes ) - bestTime = sum / PERF_LOOP_COUNT; - double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( float ) ); - vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] ); - } - } - - vlog( "\n" ); - -exit: - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - { - clReleaseProgram(test_info.programs[i]); - if( test_info.k[i] ) - { - for( j = 0; j < test_info.threadCount; j++ ) - clReleaseKernel(test_info.k[i][j]); - - free( test_info.k[i] ); - } - } - if( test_info.tinfo ) - { - for( i = 0; i < test_info.threadCount; i++ ) - { - clReleaseMemObject(test_info.tinfo[i].inBuf); - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free( test_info.tinfo ); - } - - return error; -} - -static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data ) -{ - const TestInfo *job = (const TestInfo *) data; - size_t buffer_elements = job->subBufferSize; - size_t buffer_size = buffer_elements * sizeof( cl_float ); - cl_uint scale = job->scale; - cl_uint base = job_id * (cl_uint) job->step; - ThreadInfo *tinfo = job->tinfo + thread_id; - fptr func = job->f->func; - int ftz = job->ftz; - cl_uint j, k; - cl_int error = CL_SUCCESS; - cl_int ret = CL_SUCCESS; - const char *name = job->f->name; - - int signbit_test = 0; - if(!strcmp(name, "signbit")) - signbit_test = 1; - - #define ref_func(s) ( signbit_test ? func.i_f_f( s ) : func.i_f( s ) ) - - // start the map of the output arrays - cl_event e[ VECTOR_SIZE_COUNT ]; - cl_int *out[ VECTOR_SIZE_COUNT ]; - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - out[j] = (cl_int*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error); - if( error || NULL == out[j]) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - return error; - } - } - - - // Get that moving - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush failed\n" ); - - // Write the new values to the input array - cl_uint *p = (cl_uint*) gIn + thread_id * buffer_elements; - for( j = 0; j < buffer_elements; j++ ) - p[j] = base + j * scale; - - if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error ); - return error; - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - //Wait for the map to finish - if( (error = clWaitForEvents(1, e + j) )) - { - vlog_error( "Error: clWaitForEvents failed! err: %d\n", error ); - return error; - } - if( (error = clReleaseEvent( e[j] ) )) - { - vlog_error( "Error: clReleaseEvent failed! err: %d\n", error ); - return error; - } - - // Fill the result buffer with garbage, so that old results don't carry over - uint32_t pattern = 0xffffdead; - memset_pattern4(out[j], &pattern, buffer_size); - if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error ); - return error; - } - - // run the kernel - size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j]; - cl_kernel kernel = job->k[j][thread_id]; //each worker thread has its own copy of the cl_kernel - cl_program program = job->programs[j]; - - if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; } - if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; } - - if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL))) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - return error; - } - } - - - // Get that moving - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush 2 failed\n" ); - - if( gSkipCorrectnessTesting ) - return CL_SUCCESS; - - //Calculate the correctly rounded reference result - cl_int *r = (cl_int *)gOut_Ref + thread_id * buffer_elements; - float *s = (float *)p; - for( j = 0; j < buffer_elements; j++ ) - r[j] = ref_func( s[j] ); - - // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue. - for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ ) - { - out[j] = (cl_int*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error); - if( error || NULL == out[j] ) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - return error; - } - } - // Wait for the last buffer - out[j] = (cl_int*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error); - if( error || NULL == out[j] ) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - return error; - } - - //Verify data - cl_int *t = (cl_int *)r; - for( j = 0; j < buffer_elements; j++ ) - { - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - cl_int *q = out[0]; - - // If we aren't getting the correctly rounded result - if( gMinVectorSizeIndex == 0 && t[j] != q[j]) - { - // If we aren't getting the correctly rounded result - if( ftz ) - { - if( IsFloatSubnormal( s[j]) ) - { - int correct = ref_func( +0.0f ); - int correct2 = ref_func( -0.0f ); - if( correct == q[j] || correct2 == q[j] ) - continue; - } - } - - uint32_t err = t[j] - q[j]; - if( q[j] > t[j] ) - err = q[j] - t[j]; - vlog_error( "\nERROR: %s: %d ulp error at %a: *%d vs. %d\n", name, err, ((float*) s)[j], t[j], q[j] ); - error = -1; - goto exit; - } - - - for( k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++ ) - { - q = out[k]; - // If we aren't getting the correctly rounded result - if( -t[j] != q[j] ) - { - if( ftz ) - { - if( IsFloatSubnormal( s[j])) - { - int correct = -ref_func( +0.0f ); - int correct2 = -ref_func( -0.0f ); - if( correct == q[j] || correct2 == q[j] ) - continue; - } - } - - uint32_t err = -t[j] - q[j]; - if( q[j] > -t[j] ) - err = q[j] + t[j]; - vlog_error( "\nERROR: %s%s: %d ulp error at %a: *%d vs. %d\n", name, sizeNames[k], err, ((float*) s)[j], -t[j], q[j] ); - error = -1; - goto exit; - } - } - } - } - -exit: - ret = error; - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) ) - { - vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error ); - return error; - } - } - - if( (error = clFlush(tinfo->tQueue) )) - { - vlog( "clFlush 3 failed\n" ); - return error; - } - - - if( 0 == ( base & 0x0fffffff) ) - { - if (gVerboseBruteForce) - { - vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->threadCount); - } else - { - vlog("." ); - } - fflush(stdout); - } - - return ret; -} - -static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data ); - -int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - size_t i, j; - - logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); - // Init test_info - memset( &test_info, 0, sizeof( test_info ) ); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = 1; - if (gWimpyMode ) - { - test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor; - } - - test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - //there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ftz = f->ftz || gForceFTZ; - - // cl_kernels aren't thread safe, so we make one for each vector size for every thread - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - { - size_t array_size = test_info.threadCount * sizeof( cl_kernel ); - test_info.k[i] = (cl_kernel*)malloc( array_size ); - if( NULL == test_info.k[i] ) - { - vlog_error( "Error: Unable to allocate storage for kernels!\n" ); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset( test_info.k[i], 0, array_size ); - } - test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) ); - if( NULL == test_info.tinfo ) - { - vlog_error( "Error: Unable to allocate storage for thread specific data.\n" ); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) ); - for( i = 0; i < test_info.threadCount; i++ ) - { - cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_double), test_info.subBufferSize * sizeof( cl_double) }; - test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if( error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - /* Qualcomm fix: 9461 read-write flags must be compatible with parent buffer */ - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - /* Qualcomm fix: end */ - if( error || NULL == test_info.tinfo[i].outBuf[j] ) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - } - test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error); - if( NULL == test_info.tinfo[i].tQueue || error ) - { - vlog_error( "clCreateCommandQueue failed. (%d)\n", error ); - goto exit; - } - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) - goto exit; - } - - if( !gSkipCorrectnessTesting ) - { - error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info ); - - if( error ) - goto exit; - - if( gWimpyMode ) - vlog( "Wimp pass" ); - else - vlog( "passed" ); - } - - if( gMeasureTimes ) - { - //Init input array - cl_ulong *p = (cl_ulong *)gIn; - for( j = 0; j < BUFFER_SIZE / sizeof( cl_double ); j++ ) - p[j] = DoubleFromUInt32(genrand_int32(d)); - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeValues[j] * sizeof(cl_double); - size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; - if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; } - if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; } - - double sum = 0.0; - double bestTime = INFINITY; - for( i = 0; i < PERF_LOOP_COUNT; i++ ) - { - uint64_t startTime = GetTime(); - if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - - // Make sure OpenCL is done - if( (error = clFinish(gQueue) ) ) - { - vlog_error( "Error %d at clFinish\n", error ); - goto exit; - } - - uint64_t endTime = GetTime(); - double time = SubtractTime( endTime, startTime ); - sum += time; - if( time < bestTime ) - bestTime = time; - } - - if( gReportAverageTimes ) - bestTime = sum / PERF_LOOP_COUNT; - double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( double ) ); - vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] ); - } - for( ; j < gMaxVectorSizeIndex; j++ ) - vlog( "\t -- " ); - } - - vlog( "\n" ); - -exit: - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - { - clReleaseProgram(test_info.programs[i]); - if( test_info.k[i] ) - { - for( j = 0; j < test_info.threadCount; j++ ) - clReleaseKernel(test_info.k[i][j]); - - free( test_info.k[i] ); - } - } - if( test_info.tinfo ) - { - for( i = 0; i < test_info.threadCount; i++ ) - { - clReleaseMemObject(test_info.tinfo[i].inBuf); - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free( test_info.tinfo ); - } - - return error; -} - -static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data ) -{ - const TestInfo *job = (const TestInfo *) data; - size_t buffer_elements = job->subBufferSize; - size_t buffer_size = buffer_elements * sizeof( cl_double ); - cl_uint scale = job->scale; - cl_uint base = job_id * (cl_uint) job->step; - ThreadInfo *tinfo = job->tinfo + thread_id; - dptr dfunc = job->f->dfunc; - cl_uint j, k; - cl_int error; - int ftz = job->ftz; - const char *name = job->f->name; - - Force64BitFPUPrecision(); - - // start the map of the output arrays - cl_event e[ VECTOR_SIZE_COUNT ]; - cl_long *out[ VECTOR_SIZE_COUNT ]; - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - out[j] = (cl_long*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error); - if( error || NULL == out[j]) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - return error; - } - } - - // Get that moving - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush failed\n" ); - - // Write the new values to the input array - cl_double *p = (cl_double*) gIn + thread_id * buffer_elements; - for( j = 0; j < buffer_elements; j++ ) - p[j] = DoubleFromUInt32( base + j * scale); - - if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error ); - return error; - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - //Wait for the map to finish - if( (error = clWaitForEvents(1, e + j) )) - { - vlog_error( "Error: clWaitForEvents failed! err: %d\n", error ); - return error; - } - if( (error = clReleaseEvent( e[j] ) )) - { - vlog_error( "Error: clReleaseEvent failed! err: %d\n", error ); - return error; - } - - // Fill the result buffer with garbage, so that old results don't carry over - uint32_t pattern = 0xffffdead; - memset_pattern4(out[j], &pattern, buffer_size); - if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error ); - return error; - } - - // run the kernel - size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j]; - cl_kernel kernel = job->k[j][thread_id]; //each worker thread has its own copy of the cl_kernel - cl_program program = job->programs[j]; - - if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; } - if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; } - - if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL))) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - return error; - } - } - - - // Get that moving - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush 2 failed\n" ); - - if( gSkipCorrectnessTesting ) - return CL_SUCCESS; - - //Calculate the correctly rounded reference result - cl_long *r = (cl_long *)gOut_Ref + thread_id * buffer_elements; - cl_double *s = (cl_double *)p; - for( j = 0; j < buffer_elements; j++ ) - r[j] = dfunc.i_f( s[j] ); - - // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue. - for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ ) - { - out[j] = (cl_long*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error); - if( error || NULL == out[j] ) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - return error; - } - } - // Wait for the last buffer - out[j] = (cl_long*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error); - if( error || NULL == out[j] ) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - return error; - } - - - //Verify data - cl_long *t = (cl_long *)r; - for( j = 0; j < buffer_elements; j++ ) - { - cl_long *q = out[0]; - - - // If we aren't getting the correctly rounded result - if( gMinVectorSizeIndex == 0 && t[j] != q[j]) - { - // If we aren't getting the correctly rounded result - if( ftz ) - { - if( IsDoubleSubnormal( s[j]) ) - { - cl_long correct = dfunc.i_f( +0.0f ); - cl_long correct2 = dfunc.i_f( -0.0f ); - if( correct == q[j] || correct2 == q[j] ) - continue; - } - } - - cl_ulong err = t[j] - q[j]; - if( q[j] > t[j] ) - err = q[j] - t[j]; - vlog_error( "\nERROR: %sD: %zd ulp error at %.13la: *%zd vs. %zd\n", name, err, ((double*) gIn)[j], t[j], q[j] ); - return -1; - } - - - for( k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++ ) - { - q = out[k]; - // If we aren't getting the correctly rounded result - if( -t[j] != q[j] ) - { - if( ftz ) - { - if( IsDoubleSubnormal( s[j])) - { - int64_t correct = -dfunc.i_f( +0.0f ); - int64_t correct2 = -dfunc.i_f( -0.0f ); - if( correct == q[j] || correct2 == q[j] ) - continue; - } - } - - cl_ulong err = -t[j] - q[j]; - if( q[j] > -t[j] ) - err = q[j] + t[j]; - vlog_error( "\nERROR: %sD%s: %zd ulp error at %.13la: *%zd vs. %zd\n", name, sizeNames[k], err, ((double*) gIn)[j], -t[j], q[j] ); - return -1; - } - } - - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) ) - { - vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error ); - return error; - } - } - - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush 3 failed\n" ); - - - if( 0 == ( base & 0x0fffffff) ) - { - if (gVerboseBruteForce) - { - vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->threadCount); - } else - { - vlog("." ); - } - fflush(stdout); - } - - return CL_SUCCESS; -} - - - - diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp new file mode 100644 index 0000000000..7f3521c6f2 --- /dev/null +++ b/test_conformance/math_brute_force/macro_unary_double.cpp @@ -0,0 +1,490 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "common.h" +#include "function_list.h" +#include "test_functions.h" +#include "utility.h" + +#include + +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) +{ + const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global long", + sizeNames[vectorSize], + "* out, __global double", + sizeNames[vectorSize], + "* in )\n" + "{\n" + " size_t i = get_global_id(0);\n" + " out[i] = ", + name, + "( in[i] );\n" + "}\n" }; + + const char *c3[] = { + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global long* out, __global double* in)\n" + "{\n" + " size_t i = get_global_id(0);\n" + " if( i + 1 < get_global_size(0) )\n" + " {\n" + " double3 d0 = vload3( 0, in + 3 * i );\n" + " long3 l0 = ", + name, + "( d0 );\n" + " vstore3( l0, 0, out + 3*i );\n" + " }\n" + " else\n" + " {\n" + " size_t parity = i & 1; // Figure out how many elements are " + "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " + "buffer size \n" + " double3 d0;\n" + " switch( parity )\n" + " {\n" + " case 1:\n" + " d0 = (double3)( in[3*i], NAN, NAN ); \n" + " break;\n" + " case 0:\n" + " d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n" + " break;\n" + " }\n" + " long3 l0 = ", + name, + "( d0 );\n" + " switch( parity )\n" + " {\n" + " case 0:\n" + " out[3*i+1] = l0.y; \n" + " // fall through\n" + " case 1:\n" + " out[3*i] = l0.x; \n" + " break;\n" + " }\n" + " }\n" + "}\n" + }; + + const char **kern = c; + size_t kernSize = sizeof(c) / sizeof(c[0]); + + if (sizeValues[vectorSize] == 3) + { + kern = c3; + kernSize = sizeof(c3) / sizeof(c3[0]); + } + + char testName[32]; + snprintf(testName, sizeof(testName) - 1, "math_kernel%s", + sizeNames[vectorSize]); + + return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, + relaxedMode); +} + +struct BuildKernelInfo +{ + cl_uint offset; // the first vector size to build + cl_uint kernel_count; + KernelMatrix &kernels; + cl_program *programs; + const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. +}; + +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +{ + BuildKernelInfo *info = (BuildKernelInfo *)p; + cl_uint i = info->offset + job_id; + return BuildKernel(info->nameInCode, i, info->kernel_count, + info->kernels[i].data(), info->programs + i, + info->relaxedMode); +} + +// Thread specific data for a worker thread +struct ThreadInfo +{ + cl_mem inBuf; // input buffer for the thread + cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread + cl_command_queue tQueue; // per thread command queue to improve performance +}; + +struct TestInfo +{ + size_t subBufferSize; // Size of the sub-buffer in elements + const Func *f; // A pointer to the function info + cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes + + // Thread-specific kernels for each vector size: + // k[vector_size][thread_id] + KernelMatrix k; + + // Array of thread specific information + std::vector tinfo; + + cl_uint threadCount; // Number of worker threads + cl_uint jobCount; // Number of jobs + cl_uint step; // step between each chunk and the next. + cl_uint scale; // stride between individual test values + int ftz; // non-zero if running in flush to zero mode +}; + +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +{ + TestInfo *job = (TestInfo *)data; + size_t buffer_elements = job->subBufferSize; + size_t buffer_size = buffer_elements * sizeof(cl_double); + cl_uint scale = job->scale; + cl_uint base = job_id * (cl_uint)job->step; + ThreadInfo *tinfo = &(job->tinfo[thread_id]); + dptr dfunc = job->f->dfunc; + int ftz = job->ftz; + cl_int error; + const char *name = job->f->name; + + Force64BitFPUPrecision(); + + // start the map of the output arrays + cl_event e[VECTOR_SIZE_COUNT]; + cl_long *out[VECTOR_SIZE_COUNT]; + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + out[j] = (cl_long *)clEnqueueMapBuffer( + tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, + buffer_size, 0, NULL, e + j, &error); + if (error || NULL == out[j]) + { + vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, + error); + return error; + } + } + + // Get that moving + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n"); + + // Write the new values to the input array + cl_double *p = (cl_double *)gIn + thread_id * buffer_elements; + for (size_t j = 0; j < buffer_elements; j++) + p[j] = DoubleFromUInt32(base + j * scale); + + if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, + buffer_size, p, 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error); + return error; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + // Wait for the map to finish + if ((error = clWaitForEvents(1, e + j))) + { + vlog_error("Error: clWaitForEvents failed! err: %d\n", error); + return error; + } + if ((error = clReleaseEvent(e[j]))) + { + vlog_error("Error: clReleaseEvent failed! err: %d\n", error); + return error; + } + + // Fill the result buffer with garbage, so that old results don't carry + // over + uint32_t pattern = 0xffffdead; + memset_pattern4(out[j], &pattern, buffer_size); + if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], + out[j], 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error); + return error; + } + + // run the kernel + size_t vectorCount = + (buffer_elements + sizeValues[j] - 1) / sizeValues[j]; + cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its + // own copy of the cl_kernel + cl_program program = job->programs[j]; + + if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]), + &tinfo->outBuf[j]))) + { + LogBuildError(program); + return error; + } + if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), + &tinfo->inBuf))) + { + LogBuildError(program); + return error; + } + + if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, + &vectorCount, NULL, 0, NULL, NULL))) + { + vlog_error("FAILED -- could not execute kernel\n"); + return error; + } + } + + // Get that moving + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n"); + + if (gSkipCorrectnessTesting) return CL_SUCCESS; + + // Calculate the correctly rounded reference result + cl_long *r = (cl_long *)gOut_Ref + thread_id * buffer_elements; + cl_double *s = (cl_double *)p; + for (size_t j = 0; j < buffer_elements; j++) r[j] = dfunc.i_f(s[j]); + + // Read the data back -- no need to wait for the first N-1 buffers but wait + // for the last buffer. This is an in order queue. + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE; + out[j] = (cl_long *)clEnqueueMapBuffer( + tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0, + buffer_size, 0, NULL, NULL, &error); + if (error || NULL == out[j]) + { + vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, + error); + return error; + } + } + + // Verify data + cl_long *t = (cl_long *)r; + for (size_t j = 0; j < buffer_elements; j++) + { + cl_long *q = out[0]; + + // If we aren't getting the correctly rounded result + if (gMinVectorSizeIndex == 0 && t[j] != q[j]) + { + // If we aren't getting the correctly rounded result + if (ftz) + { + if (IsDoubleSubnormal(s[j])) + { + cl_long correct = dfunc.i_f(+0.0f); + cl_long correct2 = dfunc.i_f(-0.0f); + if (correct == q[j] || correct2 == q[j]) continue; + } + } + + cl_ulong err = t[j] - q[j]; + if (q[j] > t[j]) err = q[j] - t[j]; + vlog_error("\nERROR: %sD: %zd ulp error at %.13la: *%zd vs. %zd\n", + name, err, ((double *)gIn)[j], t[j], q[j]); + return -1; + } + + + for (auto k = std::max(1U, gMinVectorSizeIndex); + k < gMaxVectorSizeIndex; k++) + { + q = out[k]; + // If we aren't getting the correctly rounded result + if (-t[j] != q[j]) + { + if (ftz) + { + if (IsDoubleSubnormal(s[j])) + { + int64_t correct = -dfunc.i_f(+0.0f); + int64_t correct2 = -dfunc.i_f(-0.0f); + if (correct == q[j] || correct2 == q[j]) continue; + } + } + + cl_ulong err = -t[j] - q[j]; + if (q[j] > -t[j]) err = q[j] + t[j]; + vlog_error( + "\nERROR: %sD%s: %zd ulp error at %.13la: *%zd vs. %zd\n", + name, sizeNames[k], err, ((double *)gIn)[j], -t[j], q[j]); + return -1; + } + } + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], + out[j], 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", + j, error); + return error; + } + } + + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n"); + + + if (0 == (base & 0x0fffffff)) + { + if (gVerboseBruteForce) + { + vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd " + "ThreadCount:%2u\n", + base, job->step, job->scale, buffer_elements, + job->threadCount); + } + else + { + vlog("."); + } + fflush(stdout); + } + + return CL_SUCCESS; +} + +} // anonymous namespace + +int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info{}; + cl_int error; + + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); + + // Init test_info + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_double)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ftz = f->ftz || gForceFTZ; + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + test_info.k[i].resize(test_info.threadCount, nullptr); + } + + test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_double), + test_info.subBufferSize * sizeof(cl_double) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + for (auto &kernel : test_info.k[i]) + { + clReleaseKernel(kernel); + } + } + + for (auto &threadInfo : test_info.tinfo) + { + clReleaseMemObject(threadInfo.inBuf); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(threadInfo.outBuf[j]); + clReleaseCommandQueue(threadInfo.tQueue); + } + + return error; +} diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp new file mode 100644 index 0000000000..0cd54de466 --- /dev/null +++ b/test_conformance/math_brute_force/macro_unary_float.cpp @@ -0,0 +1,504 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "common.h" +#include "function_list.h" +#include "test_functions.h" +#include "utility.h" + +#include + +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) +{ + const char *c[] = { "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global int", + sizeNames[vectorSize], + "* out, __global float", + sizeNames[vectorSize], + "* in )\n" + "{\n" + " size_t i = get_global_id(0);\n" + " out[i] = ", + name, + "( in[i] );\n" + "}\n" }; + + const char *c3[] = { + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global int* out, __global float* in)\n" + "{\n" + " size_t i = get_global_id(0);\n" + " if( i + 1 < get_global_size(0) )\n" + " {\n" + " float3 f0 = vload3( 0, in + 3 * i );\n" + " int3 i0 = ", + name, + "( f0 );\n" + " vstore3( i0, 0, out + 3*i );\n" + " }\n" + " else\n" + " {\n" + " size_t parity = i & 1; // Figure out how many elements are " + "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " + "buffer size \n" + " int3 i0;\n" + " float3 f0;\n" + " switch( parity )\n" + " {\n" + " case 1:\n" + " f0 = (float3)( in[3*i], 0xdead, 0xdead ); \n" + " break;\n" + " case 0:\n" + " f0 = (float3)( in[3*i], in[3*i+1], 0xdead ); \n" + " break;\n" + " }\n" + " i0 = ", + name, + "( f0 );\n" + " switch( parity )\n" + " {\n" + " case 0:\n" + " out[3*i+1] = i0.y; \n" + " // fall through\n" + " case 1:\n" + " out[3*i] = i0.x; \n" + " break;\n" + " }\n" + " }\n" + "}\n" + }; + + const char **kern = c; + size_t kernSize = sizeof(c) / sizeof(c[0]); + + if (sizeValues[vectorSize] == 3) + { + kern = c3; + kernSize = sizeof(c3) / sizeof(c3[0]); + } + + char testName[32]; + snprintf(testName, sizeof(testName) - 1, "math_kernel%s", + sizeNames[vectorSize]); + + return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, + relaxedMode); +} + +struct BuildKernelInfo +{ + cl_uint offset; // the first vector size to build + cl_uint kernel_count; + KernelMatrix &kernels; + cl_program *programs; + const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. +}; + +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +{ + BuildKernelInfo *info = (BuildKernelInfo *)p; + cl_uint i = info->offset + job_id; + return BuildKernel(info->nameInCode, i, info->kernel_count, + info->kernels[i].data(), info->programs + i, + info->relaxedMode); +} + +// Thread specific data for a worker thread +struct ThreadInfo +{ + cl_mem inBuf; // input buffer for the thread + cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread + cl_command_queue tQueue; // per thread command queue to improve performance +}; + +struct TestInfo +{ + size_t subBufferSize; // Size of the sub-buffer in elements + const Func *f; // A pointer to the function info + cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes + + // Thread-specific kernels for each vector size: + // k[vector_size][thread_id] + KernelMatrix k; + + // Array of thread specific information + std::vector tinfo; + + cl_uint threadCount; // Number of worker threads + cl_uint jobCount; // Number of jobs + cl_uint step; // step between each chunk and the next. + cl_uint scale; // stride between individual test values + int ftz; // non-zero if running in flush to zero mode +}; + +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +{ + TestInfo *job = (TestInfo *)data; + size_t buffer_elements = job->subBufferSize; + size_t buffer_size = buffer_elements * sizeof(cl_float); + cl_uint scale = job->scale; + cl_uint base = job_id * (cl_uint)job->step; + ThreadInfo *tinfo = &(job->tinfo[thread_id]); + fptr func = job->f->func; + int ftz = job->ftz; + cl_int error = CL_SUCCESS; + cl_int ret = CL_SUCCESS; + const char *name = job->f->name; + + int signbit_test = 0; + if (!strcmp(name, "signbit")) signbit_test = 1; + +#define ref_func(s) (signbit_test ? func.i_f_f(s) : func.i_f(s)) + + // start the map of the output arrays + cl_event e[VECTOR_SIZE_COUNT]; + cl_int *out[VECTOR_SIZE_COUNT]; + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + out[j] = (cl_int *)clEnqueueMapBuffer( + tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, + buffer_size, 0, NULL, e + j, &error); + if (error || NULL == out[j]) + { + vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, + error); + return error; + } + } + + // Get that moving + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n"); + + // Init input array + cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements; + for (size_t j = 0; j < buffer_elements; j++) p[j] = base + j * scale; + + if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, + buffer_size, p, 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error); + return error; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + // Wait for the map to finish + if ((error = clWaitForEvents(1, e + j))) + { + vlog_error("Error: clWaitForEvents failed! err: %d\n", error); + return error; + } + if ((error = clReleaseEvent(e[j]))) + { + vlog_error("Error: clReleaseEvent failed! err: %d\n", error); + return error; + } + + // Fill the result buffer with garbage, so that old results don't carry + // over + uint32_t pattern = 0xffffdead; + memset_pattern4(out[j], &pattern, buffer_size); + if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], + out[j], 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error); + return error; + } + + // run the kernel + size_t vectorCount = + (buffer_elements + sizeValues[j] - 1) / sizeValues[j]; + cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its + // own copy of the cl_kernel + cl_program program = job->programs[j]; + + if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]), + &tinfo->outBuf[j]))) + { + LogBuildError(program); + return error; + } + if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), + &tinfo->inBuf))) + { + LogBuildError(program); + return error; + } + + if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, + &vectorCount, NULL, 0, NULL, NULL))) + { + vlog_error("FAILED -- could not execute kernel\n"); + return error; + } + } + + // Get that moving + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n"); + + if (gSkipCorrectnessTesting) return CL_SUCCESS; + + // Calculate the correctly rounded reference result + cl_int *r = (cl_int *)gOut_Ref + thread_id * buffer_elements; + float *s = (float *)p; + for (size_t j = 0; j < buffer_elements; j++) r[j] = ref_func(s[j]); + + // Read the data back -- no need to wait for the first N-1 buffers but wait + // for the last buffer. This is an in order queue. + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE; + out[j] = (cl_int *)clEnqueueMapBuffer( + tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0, + buffer_size, 0, NULL, NULL, &error); + if (error || NULL == out[j]) + { + vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, + error); + return error; + } + } + + // Verify data + cl_int *t = (cl_int *)r; + for (size_t j = 0; j < buffer_elements; j++) + { + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + cl_int *q = out[0]; + + // If we aren't getting the correctly rounded result + if (gMinVectorSizeIndex == 0 && t[j] != q[j]) + { + // If we aren't getting the correctly rounded result + if (ftz) + { + if (IsFloatSubnormal(s[j])) + { + int correct = ref_func(+0.0f); + int correct2 = ref_func(-0.0f); + if (correct == q[j] || correct2 == q[j]) continue; + } + } + + uint32_t err = t[j] - q[j]; + if (q[j] > t[j]) err = q[j] - t[j]; + vlog_error("\nERROR: %s: %d ulp error at %a: *%d vs. %d\n", + name, err, ((float *)s)[j], t[j], q[j]); + error = -1; + goto exit; + } + + + for (auto k = std::max(1U, gMinVectorSizeIndex); + k < gMaxVectorSizeIndex; k++) + { + q = out[k]; + // If we aren't getting the correctly rounded result + if (-t[j] != q[j]) + { + if (ftz) + { + if (IsFloatSubnormal(s[j])) + { + int correct = -ref_func(+0.0f); + int correct2 = -ref_func(-0.0f); + if (correct == q[j] || correct2 == q[j]) continue; + } + } + + uint32_t err = -t[j] - q[j]; + if (q[j] > -t[j]) err = q[j] + t[j]; + vlog_error( + "\nERROR: %s%s: %d ulp error at %a: *%d vs. %d\n", name, + sizeNames[k], err, ((float *)s)[j], -t[j], q[j]); + error = -1; + goto exit; + } + } + } + } + +exit: + ret = error; + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], + out[j], 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", + j, error); + return error; + } + } + + if ((error = clFlush(tinfo->tQueue))) + { + vlog("clFlush 3 failed\n"); + return error; + } + + + if (0 == (base & 0x0fffffff)) + { + if (gVerboseBruteForce) + { + vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd " + "ThreadCount:%2u\n", + base, job->step, job->scale, buffer_elements, + job->threadCount); + } + else + { + vlog("."); + } + fflush(stdout); + } + + return ret; +} + +} // anonymous namespace + +int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info{}; + cl_int error; + + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + + // Init test_info + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_float)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ftz = + f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + test_info.k[i].resize(test_info.threadCount, nullptr); + } + + test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_float), + test_info.subBufferSize * sizeof(cl_float) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + for (auto &kernel : test_info.k[i]) + { + clReleaseKernel(kernel); + } + } + + for (auto &threadInfo : test_info.tinfo) + { + clReleaseMemObject(threadInfo.inBuf); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(threadInfo.outBuf[j]); + clReleaseCommandQueue(threadInfo.tQueue); + } + + return error; +} diff --git a/test_conformance/math_brute_force/mad.cpp b/test_conformance/math_brute_force/mad.cpp deleted file mode 100644 index 0737afbc20..0000000000 --- a/test_conformance/math_brute_force/mad.cpp +++ /dev/null @@ -1,1137 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "Utility.h" - -#include -#include "FunctionList.h" - -int TestFunc_mad(const Func *f, MTdata, bool relaxedMode); -int TestFunc_mad_Double(const Func *f, MTdata, bool relaxedMode); - -extern const vtbl _mad_tbl = { "ternary", TestFunc_mad, TestFunc_mad_Double }; - -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode); -static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode); - -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) -{ - const char *c[] = { - "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2, __global float", sizeNames[vectorSize], "* in3 )\n" - "{\n" - " int i = get_global_id(0);\n" - " out[i] = ", name, "( in1[i], in2[i], in3[i] );\n" - "}\n" - }; - const char *c3[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global float* in, __global float* in2, __global float* in3)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " float3 f0 = vload3( 0, in + 3 * i );\n" - " float3 f1 = vload3( 0, in2 + 3 * i );\n" - " float3 f2 = vload3( 0, in3 + 3 * i );\n" - " f0 = ", name, "( f0, f1, f2 );\n" - " vstore3( f0, 0, out + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n" - " float3 f0, f1, f2;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " f0 = (float3)( in[3*i], NAN, NAN ); \n" - " f1 = (float3)( in2[3*i], NAN, NAN ); \n" - " f2 = (float3)( in3[3*i], NAN, NAN ); \n" - " break;\n" - " case 0:\n" - " f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n" - " f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n" - " f2 = (float3)( in3[3*i], in3[3*i+1], NAN ); \n" - " break;\n" - " }\n" - " f0 = ", name, "( f0, f1, f2 );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = f0.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = f0.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - - const char **kern = c; - size_t kernSize = sizeof(c)/sizeof(c[0]); - - if( sizeValues[vectorSize] == 3 ) - { - kern = c3; - kernSize = sizeof(c3)/sizeof(c3[0]); - } - - char testName[32]; - snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - - return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); -} - -static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) -{ - const char *c[] = { - "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2, __global double", sizeNames[vectorSize], "* in3 )\n" - "{\n" - " int i = get_global_id(0);\n" - " out[i] = ", name, "( in1[i], in2[i], in3[i] );\n" - "}\n" - }; - const char *c3[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in, __global double* in2, __global double* in3)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " double3 d0 = vload3( 0, in + 3 * i );\n" - " double3 d1 = vload3( 0, in2 + 3 * i );\n" - " double3 d2 = vload3( 0, in3 + 3 * i );\n" - " d0 = ", name, "( d0, d1, d2 );\n" - " vstore3( d0, 0, out + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n" - " double3 d0, d1, d2;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " d0 = (double3)( in[3*i], NAN, NAN ); \n" - " d1 = (double3)( in2[3*i], NAN, NAN ); \n" - " d2 = (double3)( in3[3*i], NAN, NAN ); \n" - " break;\n" - " case 0:\n" - " d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n" - " d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n" - " d2 = (double3)( in3[3*i], in3[3*i+1], NAN ); \n" - " break;\n" - " }\n" - " d0 = ", name, "( d0, d1, d2 );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = d0.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = d0.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - - const char **kern = c; - size_t kernSize = sizeof(c)/sizeof(c[0]); - - if( sizeValues[vectorSize] == 3 ) - { - kern = c3; - kernSize = sizeof(c3)/sizeof(c3[0]); - } - - char testName[32]; - snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - - return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); -} - -typedef struct BuildKernelInfo -{ - cl_uint offset; // the first vector size to build - cl_kernel *kernels; - cl_program *programs; - const char *nameInCode; - bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -}BuildKernelInfo; - -static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); -static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ) -{ - BuildKernelInfo *info = (BuildKernelInfo*) p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernels + i, - info->programs + i, info->relaxedMode); -} - -static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); -static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ) -{ - BuildKernelInfo *info = (BuildKernelInfo*) p; - cl_uint i = info->offset + job_id; - return BuildKernelDouble(info->nameInCode, i, info->kernels + i, - info->programs + i, info->relaxedMode); -} - -int TestFunc_mad(const Func *f, MTdata d, bool relaxedMode) -{ - uint64_t i; - uint32_t j, k; - int error; - - logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - - cl_program programs[ VECTOR_SIZE_COUNT ]; - cl_kernel kernels[ VECTOR_SIZE_COUNT ]; - float maxError = 0.0f; -// int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); - float maxErrorVal = 0.0f; - float maxErrorVal2 = 0.0f; - float maxErrorVal3 = 0.0f; - size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE; - uint64_t step = bufferSize / sizeof( float ); - - if( gWimpyMode ) - { - step = (1ULL<<32) * gWimpyReductionFactor / (512); - } - // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; - if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) - return error; -/* - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - if( (error = BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) ) - return error; -*/ - - for( i = 0; i < (1ULL<<32); i += step ) - { - //Init input array - uint32_t *p = (uint32_t *)gIn; - uint32_t *p2 = (uint32_t *)gIn2; - uint32_t *p3 = (uint32_t *)gIn3; - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - { - p[j] = genrand_int32(d); - p2[j] = genrand_int32(d); - p3[j] = genrand_int32(d); - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error ); - return error; - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error ); - return error; - } - - // write garbage into output arrays - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - uint32_t pattern = 0xffffdead; - memset_pattern4(gOut[j], &pattern, bufferSize); - if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j ); - goto exit; - } - } - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeof( cl_float ) * sizeValues[j]; - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; // bufferSize / vectorSize rounded up - if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; } - - if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - } - - // Get that moving - if( (error = clFlush(gQueue) )) - vlog( "clFlush failed\n" ); - - //Calculate the correctly rounded reference result - float *r = (float *)gOut_Ref; - float *s = (float *)gIn; - float *s2 = (float *)gIn2; - float *s3 = (float *)gIn3; - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - r[j] = (float) f->func.f_fff( s[j], s2[j], s3[j] ); - - // Read the data back - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) ) - { - vlog_error( "ReadArray failed %d\n", error ); - goto exit; - } - } - - if( gSkipCorrectnessTesting ) - break; - - //Verify data -- Commented out on purpose. no verification possible. MAD is a random number generator. -/* - uint32_t *t = gOut_Ref; - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - { - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - uint32_t *q = gOut[k]; - - // If we aren't getting the correctly rounded result - if( t[j] != q[j] ) - { - float test = ((float*) q)[j]; - double correct = f->func.f_fff( s[j], s2[j], s3[j] ); - float err = Ulp_Error( test, correct ); - int fail = ! (fabsf(err) <= f->float_ulps); - - if( fail && ftz ) - { - // retry per section 6.5.3.2 - if( IsFloatSubnormal(correct) ) - { // look at me, - fail = fail && ( test != 0.0f ); - if( ! fail ) - err = 0.0f; - } - - // retry per section 6.5.3.3 - if( fail && IsFloatSubnormal( s[j] ) ) - { // look at me, - double correct2 = f->func.f_fff( 0.0, s2[j], s3[j] ); - double correct3 = f->func.f_fff( -0.0, s2[j], s3[j] ); - float err2 = Ulp_Error( test, correct2 ); - float err3 = Ulp_Error( test, correct3 ); - fail = fail && ((!(fabsf(err2) <= f->float_ulps)) && (!(fabsf(err3) <= f->float_ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - - // retry per section 6.5.3.4 - if( IsFloatResultSubnormal(correct2, f->float_ulps ) || IsFloatResultSubnormal(correct3, f->float_ulps ) ) - { // look at me now, - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - - //try with first two args as zero - if( IsFloatSubnormal( s2[j] ) ) - { // its fun to have fun, - correct2 = f->func.f_fff( 0.0, 0.0, s3[j] ); - correct3 = f->func.f_fff( -0.0, 0.0, s3[j] ); - double correct4 = f->func.f_fff( 0.0, -0.0, s3[j] ); - double correct5 = f->func.f_fff( -0.0, -0.0, s3[j] ); - err2 = Ulp_Error( test, correct2 ); - err3 = Ulp_Error( test, correct3 ); - float err4 = Ulp_Error( test, correct4 ); - float err5 = Ulp_Error( test, correct5 ); - fail = fail && ((!(fabsf(err2) <= f->float_ulps)) && (!(fabsf(err3) <= f->float_ulps)) && - (!(fabsf(err4) <= f->float_ulps)) && (!(fabsf(err5) <= f->float_ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - if( fabsf( err4 ) < fabsf(err ) ) - err = err4; - if( fabsf( err5 ) < fabsf(err ) ) - err = err5; - - // retry per section 6.5.3.4 - if( IsFloatResultSubnormal(correct2, f->float_ulps ) || IsFloatResultSubnormal(correct3, f->float_ulps ) || - IsFloatResultSubnormal(correct4, f->float_ulps ) || IsFloatResultSubnormal(correct5, f->float_ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - - if( IsFloatSubnormal( s3[j] ) ) - { // but you have to know how! - correct2 = f->func.f_fff( 0.0, 0.0, 0.0f ); - correct3 = f->func.f_fff( -0.0, 0.0, 0.0f ); - correct4 = f->func.f_fff( 0.0, -0.0, 0.0f ); - correct5 = f->func.f_fff( -0.0, -0.0, 0.0f ); - double correct6 = f->func.f_fff( 0.0, 0.0, -0.0f ); - double correct7 = f->func.f_fff( -0.0, 0.0, -0.0f ); - double correct8 = f->func.f_fff( 0.0, -0.0, -0.0f ); - double correct9 = f->func.f_fff( -0.0, -0.0, -0.0f ); - err2 = Ulp_Error( test, correct2 ); - err3 = Ulp_Error( test, correct3 ); - err4 = Ulp_Error( test, correct4 ); - err5 = Ulp_Error( test, correct5 ); - float err6 = Ulp_Error( test, correct6 ); - float err7 = Ulp_Error( test, correct7 ); - float err8 = Ulp_Error( test, correct8 ); - float err9 = Ulp_Error( test, correct9 ); - fail = fail && ((!(fabsf(err2) <= f->float_ulps)) && (!(fabsf(err3) <= f->float_ulps)) && - (!(fabsf(err4) <= f->float_ulps)) && (!(fabsf(err5) <= f->float_ulps)) && - (!(fabsf(err5) <= f->float_ulps)) && (!(fabsf(err6) <= f->float_ulps)) && - (!(fabsf(err7) <= f->float_ulps)) && (!(fabsf(err8) <= f->float_ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - if( fabsf( err4 ) < fabsf(err ) ) - err = err4; - if( fabsf( err5 ) < fabsf(err ) ) - err = err5; - if( fabsf( err6 ) < fabsf(err ) ) - err = err6; - if( fabsf( err7 ) < fabsf(err ) ) - err = err7; - if( fabsf( err8 ) < fabsf(err ) ) - err = err8; - if( fabsf( err9 ) < fabsf(err ) ) - err = err9; - - // retry per section 6.5.3.4 - if( IsFloatResultSubnormal(correct2, f->float_ulps ) || IsFloatResultSubnormal(correct3, f->float_ulps ) || - IsFloatResultSubnormal(correct4, f->float_ulps ) || IsFloatResultSubnormal(correct5, f->float_ulps ) || - IsFloatResultSubnormal( correct6, f->float_ulps ) || IsFloatResultSubnormal(correct7, f->float_ulps ) || - IsFloatResultSubnormal(correct8, f->float_ulps ) || IsFloatResultSubnormal( correct9, f->float_ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - } - } - else if( IsFloatSubnormal( s3[j] ) ) - { - correct2 = f->func.f_fff( 0.0, s2[j], 0.0 ); - correct3 = f->func.f_fff( -0.0, s2[j], 0.0 ); - double correct4 = f->func.f_fff( 0.0, s2[j], -0.0 ); - double correct5 = f->func.f_fff( -0.0, s2[j], -0.0 ); - err2 = Ulp_Error( test, correct2 ); - err3 = Ulp_Error( test, correct3 ); - float err4 = Ulp_Error( test, correct4 ); - float err5 = Ulp_Error( test, correct5 ); - fail = fail && ((!(fabsf(err2) <= f->float_ulps)) && (!(fabsf(err3) <= f->float_ulps)) && - (!(fabsf(err4) <= f->float_ulps)) && (!(fabsf(err5) <= f->float_ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - if( fabsf( err4 ) < fabsf(err ) ) - err = err4; - if( fabsf( err5 ) < fabsf(err ) ) - err = err5; - - // retry per section 6.5.3.4 - if( IsFloatResultSubnormal(correct2, f->float_ulps ) || IsFloatResultSubnormal(correct3, f->float_ulps ) || - IsFloatResultSubnormal(correct4, f->float_ulps ) || IsFloatResultSubnormal(correct5, f->float_ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - } - } - else if( fail && IsFloatSubnormal( s2[j] ) ) - { - double correct2 = f->func.f_fff( s[j], 0.0, s3[j] ); - double correct3 = f->func.f_fff( s[j], -0.0, s3[j] ); - float err2 = Ulp_Error( test, correct2 ); - float err3 = Ulp_Error( test, correct3 ); - fail = fail && ((!(fabsf(err2) <= f->float_ulps)) && (!(fabsf(err3) <= f->float_ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - - // retry per section 6.5.3.4 - if( IsFloatResultSubnormal(correct2, f->float_ulps ) || IsFloatResultSubnormal(correct3, f->float_ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - - //try with second two args as zero - if( IsFloatSubnormal( s3[j] ) ) - { - correct2 = f->func.f_fff( s[j], 0.0, 0.0 ); - correct3 = f->func.f_fff( s[j], -0.0, 0.0 ); - double correct4 = f->func.f_fff( s[j], 0.0, -0.0 ); - double correct5 = f->func.f_fff( s[j], -0.0, -0.0 ); - err2 = Ulp_Error( test, correct2 ); - err3 = Ulp_Error( test, correct3 ); - float err4 = Ulp_Error( test, correct4 ); - float err5 = Ulp_Error( test, correct5 ); - fail = fail && ((!(fabsf(err2) <= f->float_ulps)) && (!(fabsf(err3) <= f->float_ulps)) && - (!(fabsf(err4) <= f->float_ulps)) && (!(fabsf(err5) <= f->float_ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - if( fabsf( err4 ) < fabsf(err ) ) - err = err4; - if( fabsf( err5 ) < fabsf(err ) ) - err = err5; - - // retry per section 6.5.3.4 - if( IsFloatResultSubnormal(correct2, f->float_ulps ) || IsFloatResultSubnormal(correct3, f->float_ulps ) || - IsFloatResultSubnormal(correct4, f->float_ulps ) || IsFloatResultSubnormal(correct5, f->float_ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - } - } - else if( fail && IsFloatSubnormal(s3[j]) ) - { - double correct2 = f->func.f_fff( s[j], s2[j], 0.0 ); - double correct3 = f->func.f_fff( s[j], s2[j], -0.0 ); - float err2 = Ulp_Error( test, correct2 ); - float err3 = Ulp_Error( test, correct3 ); - fail = fail && ((!(fabsf(err2) <= f->float_ulps)) && (!(fabsf(err3) <= f->float_ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - - // retry per section 6.5.3.4 - if( IsFloatResultSubnormal(correct2, f->float_ulps ) || IsFloatResultSubnormal(correct3, f->float_ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - } - } - - if( fabsf(err ) > maxError ) - { - maxError = fabsf(err); - maxErrorVal = s[j]; - maxErrorVal2 = s2[j]; - maxErrorVal3 = s3[j]; - } - - if( fail ) - { - vlog_error( "\nERROR: %s%s: %f ulp error at {%a, %a, %a}: *%a vs. %a\n", f->name, sizeNames[k], err, s[j], s2[j], s3[j], ((float*) gOut_Ref)[j], test ); - error = -1; - goto exit; - } - } - } - } -*/ - if( 0 == (i & 0x0fffffff) ) - { - vlog("." ); - fflush(stdout); - } - } - - if( ! gSkipCorrectnessTesting ) - { - if( gWimpyMode ) - vlog( "Wimp pass" ); - else - vlog( "pass" ); - } - - if( gMeasureTimes ) - { - //Init input array - uint32_t *p = (uint32_t *)gIn; - uint32_t *p2 = (uint32_t *)gIn2; - uint32_t *p3 = (uint32_t *)gIn3; - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - { - p[j] = genrand_int32(d); - p2[j] = genrand_int32(d); - p3[j] = genrand_int32(d); - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error ); - return error; - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error ); - return error; - } - - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeof( cl_float ) * sizeValues[j]; - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; // bufferSize / vectorSize rounded up - if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; } - - double sum = 0.0; - double bestTime = INFINITY; - for( k = 0; k < PERF_LOOP_COUNT; k++ ) - { - uint64_t startTime = GetTime(); - if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - - // Make sure OpenCL is done - if( (error = clFinish(gQueue) ) ) - { - vlog_error( "Error %d at clFinish\n", error ); - goto exit; - } - - uint64_t endTime = GetTime(); - double time = SubtractTime( endTime, startTime ); - sum += time; - if( time < bestTime ) - bestTime = time; - } - - if( gReportAverageTimes ) - bestTime = sum / PERF_LOOP_COUNT; - double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) ); - vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] ); - } - } - - if( ! gSkipCorrectnessTesting ) - vlog( "\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2, maxErrorVal3 ); - vlog( "\n" ); - -exit: - // Release - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - clReleaseKernel(kernels[k]); - clReleaseProgram(programs[k]); - } - - return error; -} - -int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode) -{ - uint64_t i; - uint32_t j, k; - int error; - cl_program programs[ VECTOR_SIZE_COUNT ]; - cl_kernel kernels[ VECTOR_SIZE_COUNT ]; - float maxError = 0.0f; -// int ftz = f->ftz || gForceFTZ; - double maxErrorVal = 0.0f; - double maxErrorVal2 = 0.0f; - double maxErrorVal3 = 0.0f; - size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE; - - logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); - uint64_t step = bufferSize / sizeof( double ); - if( gWimpyMode ) - { - step = (1ULL<<32) * gWimpyReductionFactor / (512); - } - // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; - if( (error = ThreadPool_Do( BuildKernel_DoubleFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info ) )) - { - return error; - } -/* - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - if( (error = BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) ) - return error; -*/ - - for( i = 0; i < (1ULL<<32); i += step ) - { - //Init input array - double *p = (double *)gIn; - double *p2 = (double *)gIn2; - double *p3 = (double *)gIn3; - for( j = 0; j < bufferSize / sizeof( double ); j++ ) - { - p[j] = DoubleFromUInt32(genrand_int32(d)); - p2[j] = DoubleFromUInt32(genrand_int32(d)); - p3[j] = DoubleFromUInt32(genrand_int32(d)); - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error ); - return error; - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error ); - return error; - } - - // write garbage into output arrays - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - uint32_t pattern = 0xffffdead; - memset_pattern4(gOut[j], &pattern, bufferSize); - if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j ); - goto exit; - } - } - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeof( cl_double ) * sizeValues[j]; - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; // bufferSize / vectorSize rounded up - if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; } - - if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - } - - // Get that moving - if( (error = clFlush(gQueue) )) - vlog( "clFlush failed\n" ); - - //Calculate the correctly rounded reference result - double *r = (double *)gOut_Ref; - double *s = (double *)gIn; - double *s2 = (double *)gIn2; - double *s3 = (double *)gIn3; - for( j = 0; j < bufferSize / sizeof( double ); j++ ) - r[j] = (double) f->dfunc.f_fff( s[j], s2[j], s3[j] ); - - // Read the data back - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) ) - { - vlog_error( "ReadArray failed %d\n", error ); - goto exit; - } - } - - if( gSkipCorrectnessTesting ) - break; - - //Verify data -- Commented out on purpose. no verification possible. MAD is a random number generator. -/* - uint64_t *t = gOut_Ref; - for( j = 0; j < bufferSize / sizeof( double ); j++ ) - { - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - uint64_t *q = gOut[k]; - - // If we aren't getting the correctly rounded result - if( t[j] != q[j] ) - { - double test = ((double*) q)[j]; - long double correct = f->dfunc.f_fff( s[j], s2[j], s3[j] ); - float err = Bruteforce_Ulp_Error_Double( test, correct ); - int fail = ! (fabsf(err) <= f->double_ulps); - - if( fail && ftz ) - { - // retry per section 6.5.3.2 - if( IsDoubleResultSubnormal(correct, f->double_ulps) ) - { // look at me, - fail = fail && ( test != 0.0f ); - if( ! fail ) - err = 0.0f; - } - - // retry per section 6.5.3.3 - if( fail && IsDoubleSubnormal( s[j] ) ) - { // look at me, - long double correct2 = f->dfunc.f_fff( 0.0, s2[j], s3[j] ); - long double correct3 = f->dfunc.f_fff( -0.0, s2[j], s3[j] ); - float err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); - float err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); - fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - - // retry per section 6.5.3.4 - if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) ) - { // look at me now, - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - - //try with first two args as zero - if( IsDoubleSubnormal( s2[j] ) ) - { // its fun to have fun, - correct2 = f->dfunc.f_fff( 0.0, 0.0, s3[j] ); - correct3 = f->dfunc.f_fff( -0.0, 0.0, s3[j] ); - long double correct4 = f->dfunc.f_fff( 0.0, -0.0, s3[j] ); - long double correct5 = f->dfunc.f_fff( -0.0, -0.0, s3[j] ); - err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); - err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); - float err4 = Bruteforce_Ulp_Error_Double( test, correct4 ); - float err5 = Bruteforce_Ulp_Error_Double( test, correct5 ); - fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) && - (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - if( fabsf( err4 ) < fabsf(err ) ) - err = err4; - if( fabsf( err5 ) < fabsf(err ) ) - err = err5; - - // retry per section 6.5.3.4 - if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) || - IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - - if( IsDoubleSubnormal( s3[j] ) ) - { // but you have to know how! - correct2 = f->dfunc.f_fff( 0.0, 0.0, 0.0f ); - correct3 = f->dfunc.f_fff( -0.0, 0.0, 0.0f ); - correct4 = f->dfunc.f_fff( 0.0, -0.0, 0.0f ); - correct5 = f->dfunc.f_fff( -0.0, -0.0, 0.0f ); - long double correct6 = f->dfunc.f_fff( 0.0, 0.0, -0.0f ); - long double correct7 = f->dfunc.f_fff( -0.0, 0.0, -0.0f ); - long double correct8 = f->dfunc.f_fff( 0.0, -0.0, -0.0f ); - long double correct9 = f->dfunc.f_fff( -0.0, -0.0, -0.0f ); - err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); - err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); - err4 = Bruteforce_Ulp_Error_Double( test, correct4 ); - err5 = Bruteforce_Ulp_Error_Double( test, correct5 ); - float err6 = Bruteforce_Ulp_Error_Double( test, correct6 ); - float err7 = Bruteforce_Ulp_Error_Double( test, correct7 ); - float err8 = Bruteforce_Ulp_Error_Double( test, correct8 ); - float err9 = Bruteforce_Ulp_Error_Double( test, correct9 ); - fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) && - (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)) && - (!(fabsf(err5) <= f->double_ulps)) && (!(fabsf(err6) <= f->double_ulps)) && - (!(fabsf(err7) <= f->double_ulps)) && (!(fabsf(err8) <= f->double_ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - if( fabsf( err4 ) < fabsf(err ) ) - err = err4; - if( fabsf( err5 ) < fabsf(err ) ) - err = err5; - if( fabsf( err6 ) < fabsf(err ) ) - err = err6; - if( fabsf( err7 ) < fabsf(err ) ) - err = err7; - if( fabsf( err8 ) < fabsf(err ) ) - err = err8; - if( fabsf( err9 ) < fabsf(err ) ) - err = err9; - - // retry per section 6.5.3.4 - if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) || - IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps ) || - IsDoubleResultSubnormal( correct6, f->double_ulps ) || IsDoubleResultSubnormal( correct7, f->double_ulps ) || - IsDoubleResultSubnormal( correct8, f->double_ulps ) || IsDoubleResultSubnormal( correct9, f->double_ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - } - } - else if( IsDoubleSubnormal( s3[j] ) ) - { - correct2 = f->dfunc.f_fff( 0.0, s2[j], 0.0 ); - correct3 = f->dfunc.f_fff( -0.0, s2[j], 0.0 ); - long double correct4 = f->dfunc.f_fff( 0.0, s2[j], -0.0 ); - long double correct5 = f->dfunc.f_fff( -0.0, s2[j], -0.0 ); - err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); - err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); - float err4 = Bruteforce_Ulp_Error_Double( test, correct4 ); - float err5 = Bruteforce_Ulp_Error_Double( test, correct5 ); - fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) && - (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - if( fabsf( err4 ) < fabsf(err ) ) - err = err4; - if( fabsf( err5 ) < fabsf(err ) ) - err = err5; - - // retry per section 6.5.3.4 - if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) || - IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - } - } - else if( fail && IsDoubleSubnormal( s2[j] ) ) - { - long double correct2 = f->dfunc.f_fff( s[j], 0.0, s3[j] ); - long double correct3 = f->dfunc.f_fff( s[j], -0.0, s3[j] ); - float err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); - float err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); - fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - - // retry per section 6.5.3.4 - if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - - //try with second two args as zero - if( IsDoubleSubnormal( s3[j] ) ) - { - correct2 = f->dfunc.f_fff( s[j], 0.0, 0.0 ); - correct3 = f->dfunc.f_fff( s[j], -0.0, 0.0 ); - long double correct4 = f->dfunc.f_fff( s[j], 0.0, -0.0 ); - long double correct5 = f->dfunc.f_fff( s[j], -0.0, -0.0 ); - err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); - err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); - float err4 = Bruteforce_Ulp_Error_Double( test, correct4 ); - float err5 = Bruteforce_Ulp_Error_Double( test, correct5 ); - fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) && - (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - if( fabsf( err4 ) < fabsf(err ) ) - err = err4; - if( fabsf( err5 ) < fabsf(err ) ) - err = err5; - - // retry per section 6.5.3.4 - if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) || - IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - } - } - else if( fail && IsDoubleSubnormal(s3[j]) ) - { - long double correct2 = f->dfunc.f_fff( s[j], s2[j], 0.0 ); - long double correct3 = f->dfunc.f_fff( s[j], s2[j], -0.0 ); - float err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); - float err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); - fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - - // retry per section 6.5.3.4 - if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - } - } - - if( fabsf(err ) > maxError ) - { - maxError = fabsf(err); - maxErrorVal = s[j]; - maxErrorVal2 = s2[j]; - maxErrorVal3 = s3[j]; - } - - if( fail ) - { - vlog_error( "\nERROR: %sD%s: %f ulp error at {%a, %a, %a}: *%a vs. %a\n", f->name, sizeNames[k], err, s[j], s2[j], s3[j], ((double*) gOut_Ref)[j], test ); - error = -1; - goto exit; - } - } - } - } -*/ - if( 0 == (i & 0x0fffffff) ) - { - vlog("." ); - fflush(stdout); - } - } - - if( ! gSkipCorrectnessTesting ) - { - if( gWimpyMode ) - vlog( "Wimp pass" ); - else - vlog( "pass" ); - } - - if( gMeasureTimes ) - { - //Init input array - double *p = (double *)gIn; - double *p2 = (double *)gIn2; - double *p3 = (double *)gIn3; - for( j = 0; j < bufferSize / sizeof( double ); j++ ) - { - p[j] = DoubleFromUInt32(genrand_int32(d)); - p2[j] = DoubleFromUInt32(genrand_int32(d)); - p3[j] = DoubleFromUInt32(genrand_int32(d)); - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error ); - return error; - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error ); - return error; - } - - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeof( cl_double ) * sizeValues[j]; - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; // bufferSize / vectorSize rounded up - if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; } - - double sum = 0.0; - double bestTime = INFINITY; - for( k = 0; k < PERF_LOOP_COUNT; k++ ) - { - uint64_t startTime = GetTime(); - if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - - // Make sure OpenCL is done - if( (error = clFinish(gQueue) ) ) - { - vlog_error( "Error %d at clFinish\n", error ); - goto exit; - } - - uint64_t endTime = GetTime(); - double time = SubtractTime( endTime, startTime ); - sum += time; - if( time < bestTime ) - bestTime = time; - } - - if( gReportAverageTimes ) - bestTime = sum / PERF_LOOP_COUNT; - double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) ); - vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] ); - } - for( ; j < gMaxVectorSizeIndex; j++ ) - vlog( "\t -- " ); - } - - if( ! gSkipCorrectnessTesting ) - vlog( "\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2, maxErrorVal3 ); - vlog( "\n" ); - -exit: - // Release - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - clReleaseKernel(kernels[k]); - clReleaseProgram(programs[k]); - } - - return error; -} - - - diff --git a/test_conformance/math_brute_force/mad_double.cpp b/test_conformance/math_brute_force/mad_double.cpp new file mode 100644 index 0000000000..8e88f9f624 --- /dev/null +++ b/test_conformance/math_brute_force/mad_double.cpp @@ -0,0 +1,305 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "function_list.h" +#include "test_functions.h" +#include "utility.h" + +#include + +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) +{ + const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global double", + sizeNames[vectorSize], + "* out, __global double", + sizeNames[vectorSize], + "* in1, __global double", + sizeNames[vectorSize], + "* in2, __global double", + sizeNames[vectorSize], + "* in3 )\n" + "{\n" + " size_t i = get_global_id(0);\n" + " out[i] = ", + name, + "( in1[i], in2[i], in3[i] );\n" + "}\n" }; + + const char *c3[] = { + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global double* out, __global double* in, __global double* in2, " + "__global double* in3)\n" + "{\n" + " size_t i = get_global_id(0);\n" + " if( i + 1 < get_global_size(0) )\n" + " {\n" + " double3 d0 = vload3( 0, in + 3 * i );\n" + " double3 d1 = vload3( 0, in2 + 3 * i );\n" + " double3 d2 = vload3( 0, in3 + 3 * i );\n" + " d0 = ", + name, + "( d0, d1, d2 );\n" + " vstore3( d0, 0, out + 3*i );\n" + " }\n" + " else\n" + " {\n" + " size_t parity = i & 1; // Figure out how many elements are " + "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " + "buffer size \n" + " double3 d0;\n" + " double3 d1;\n" + " double3 d2;\n" + " switch( parity )\n" + " {\n" + " case 1:\n" + " d0 = (double3)( in[3*i], NAN, NAN ); \n" + " d1 = (double3)( in2[3*i], NAN, NAN ); \n" + " d2 = (double3)( in3[3*i], NAN, NAN ); \n" + " break;\n" + " case 0:\n" + " d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n" + " d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n" + " d2 = (double3)( in3[3*i], in3[3*i+1], NAN ); \n" + " break;\n" + " }\n" + " d0 = ", + name, + "( d0, d1, d2 );\n" + " switch( parity )\n" + " {\n" + " case 0:\n" + " out[3*i+1] = d0.y; \n" + " // fall through\n" + " case 1:\n" + " out[3*i] = d0.x; \n" + " break;\n" + " }\n" + " }\n" + "}\n" + }; + + const char **kern = c; + size_t kernSize = sizeof(c) / sizeof(c[0]); + + if (sizeValues[vectorSize] == 3) + { + kern = c3; + kernSize = sizeof(c3) / sizeof(c3[0]); + } + + char testName[32]; + snprintf(testName, sizeof(testName) - 1, "math_kernel%s", + sizeNames[vectorSize]); + + return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); +} + +struct BuildKernelInfo +{ + cl_uint offset; // the first vector size to build + cl_kernel *kernels; + cl_program *programs; + const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. +}; + +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +{ + BuildKernelInfo *info = (BuildKernelInfo *)p; + cl_uint i = info->offset + job_id; + return BuildKernel(info->nameInCode, i, info->kernels + i, + info->programs + i, info->relaxedMode); +} + +} // anonymous namespace + +int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode) +{ + int error; + cl_program programs[VECTOR_SIZE_COUNT]; + cl_kernel kernels[VECTOR_SIZE_COUNT]; + float maxError = 0.0f; + double maxErrorVal = 0.0f; + double maxErrorVal2 = 0.0f; + double maxErrorVal3 = 0.0f; + uint64_t step = getTestStep(sizeof(double), BUFFER_SIZE); + + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); + + // Init the kernels + { + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + return error; + } + + for (uint64_t i = 0; i < (1ULL << 32); i += step) + { + // Init input array + double *p = (double *)gIn; + double *p2 = (double *)gIn2; + double *p3 = (double *)gIn3; + for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++) + { + p[j] = DoubleFromUInt32(genrand_int32(d)); + p2[j] = DoubleFromUInt32(genrand_int32(d)); + p3[j] = DoubleFromUInt32(genrand_int32(d)); + } + + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, + BUFFER_SIZE, gIn, 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); + return error; + } + + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, + BUFFER_SIZE, gIn2, 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error); + return error; + } + + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, + BUFFER_SIZE, gIn3, 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error); + return error; + } + + // write garbage into output arrays + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + uint32_t pattern = 0xffffdead; + memset_pattern4(gOut[j], &pattern, BUFFER_SIZE); + if ((error = + clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, + BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", + error, j); + goto exit; + } + } + + // Run the kernels + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + size_t vectorSize = sizeof(cl_double) * sizeValues[j]; + size_t localCount = (BUFFER_SIZE + vectorSize - 1) + / vectorSize; // BUFFER_SIZE / vectorSize rounded up + if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]), + &gOutBuffer[j]))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer), + &gInBuffer))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer2), + &gInBuffer2))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer3), + &gInBuffer3))) + { + LogBuildError(programs[j]); + goto exit; + } + + if ((error = + clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, + &localCount, NULL, 0, NULL, NULL))) + { + vlog_error("FAILED -- could not execute kernel\n"); + goto exit; + } + } + + // Get that moving + if ((error = clFlush(gQueue))) vlog("clFlush failed\n"); + + // Calculate the correctly rounded reference result + double *r = (double *)gOut_Ref; + double *s = (double *)gIn; + double *s2 = (double *)gIn2; + double *s3 = (double *)gIn3; + for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++) + r[j] = (double)f->dfunc.f_fff(s[j], s2[j], s3[j]); + + // Read the data back + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + if ((error = + clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, + BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + { + vlog_error("ReadArray failed %d\n", error); + goto exit; + } + } + + if (gSkipCorrectnessTesting) break; + + // Verify data -- No verification possible. + // MAD is a random number generator. + if (0 == (i & 0x0fffffff)) + { + vlog("."); + fflush(stdout); + } + } + + if (!gSkipCorrectnessTesting) + { + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2, + maxErrorVal3); + } + + vlog("\n"); + +exit: + // Release + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + clReleaseKernel(kernels[k]); + clReleaseProgram(programs[k]); + } + + return error; +} diff --git a/test_conformance/math_brute_force/mad_float.cpp b/test_conformance/math_brute_force/mad_float.cpp new file mode 100644 index 0000000000..0552ba4b96 --- /dev/null +++ b/test_conformance/math_brute_force/mad_float.cpp @@ -0,0 +1,304 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "function_list.h" +#include "test_functions.h" +#include "utility.h" + +#include + +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) +{ + const char *c[] = { "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global float", + sizeNames[vectorSize], + "* out, __global float", + sizeNames[vectorSize], + "* in1, __global float", + sizeNames[vectorSize], + "* in2, __global float", + sizeNames[vectorSize], + "* in3 )\n" + "{\n" + " size_t i = get_global_id(0);\n" + " out[i] = ", + name, + "( in1[i], in2[i], in3[i] );\n" + "}\n" }; + + const char *c3[] = { + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global float* out, __global float* in, __global float* in2, " + "__global float* in3)\n" + "{\n" + " size_t i = get_global_id(0);\n" + " if( i + 1 < get_global_size(0) )\n" + " {\n" + " float3 f0 = vload3( 0, in + 3 * i );\n" + " float3 f1 = vload3( 0, in2 + 3 * i );\n" + " float3 f2 = vload3( 0, in3 + 3 * i );\n" + " f0 = ", + name, + "( f0, f1, f2 );\n" + " vstore3( f0, 0, out + 3*i );\n" + " }\n" + " else\n" + " {\n" + " size_t parity = i & 1; // Figure out how many elements are " + "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " + "buffer size \n" + " float3 f0;\n" + " float3 f1;\n" + " float3 f2;\n" + " switch( parity )\n" + " {\n" + " case 1:\n" + " f0 = (float3)( in[3*i], NAN, NAN ); \n" + " f1 = (float3)( in2[3*i], NAN, NAN ); \n" + " f2 = (float3)( in3[3*i], NAN, NAN ); \n" + " break;\n" + " case 0:\n" + " f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n" + " f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n" + " f2 = (float3)( in3[3*i], in3[3*i+1], NAN ); \n" + " break;\n" + " }\n" + " f0 = ", + name, + "( f0, f1, f2 );\n" + " switch( parity )\n" + " {\n" + " case 0:\n" + " out[3*i+1] = f0.y; \n" + " // fall through\n" + " case 1:\n" + " out[3*i] = f0.x; \n" + " break;\n" + " }\n" + " }\n" + "}\n" + }; + + const char **kern = c; + size_t kernSize = sizeof(c) / sizeof(c[0]); + + if (sizeValues[vectorSize] == 3) + { + kern = c3; + kernSize = sizeof(c3) / sizeof(c3[0]); + } + + char testName[32]; + snprintf(testName, sizeof(testName) - 1, "math_kernel%s", + sizeNames[vectorSize]); + + return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); +} + +struct BuildKernelInfo +{ + cl_uint offset; // the first vector size to build + cl_kernel *kernels; + cl_program *programs; + const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. +}; + +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +{ + BuildKernelInfo *info = (BuildKernelInfo *)p; + cl_uint i = info->offset + job_id; + return BuildKernel(info->nameInCode, i, info->kernels + i, + info->programs + i, info->relaxedMode); +} + +} // anonymous namespace + +int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode) +{ + int error; + + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + + cl_program programs[VECTOR_SIZE_COUNT]; + cl_kernel kernels[VECTOR_SIZE_COUNT]; + float maxError = 0.0f; + float maxErrorVal = 0.0f; + float maxErrorVal2 = 0.0f; + float maxErrorVal3 = 0.0f; + uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE); + + // Init the kernels + { + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + return error; + } + + for (uint64_t i = 0; i < (1ULL << 32); i += step) + { + // Init input array + cl_uint *p = (cl_uint *)gIn; + cl_uint *p2 = (cl_uint *)gIn2; + cl_uint *p3 = (cl_uint *)gIn3; + for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++) + { + p[j] = genrand_int32(d); + p2[j] = genrand_int32(d); + p3[j] = genrand_int32(d); + } + + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, + BUFFER_SIZE, gIn, 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); + return error; + } + + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, + BUFFER_SIZE, gIn2, 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error); + return error; + } + + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, + BUFFER_SIZE, gIn3, 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error); + return error; + } + + // write garbage into output arrays + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + uint32_t pattern = 0xffffdead; + memset_pattern4(gOut[j], &pattern, BUFFER_SIZE); + if ((error = + clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, + BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", + error, j); + goto exit; + } + } + + // Run the kernels + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + size_t vectorSize = sizeof(cl_float) * sizeValues[j]; + size_t localCount = (BUFFER_SIZE + vectorSize - 1) + / vectorSize; // BUFFER_SIZE / vectorSize rounded up + if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]), + &gOutBuffer[j]))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer), + &gInBuffer))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer2), + &gInBuffer2))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer3), + &gInBuffer3))) + { + LogBuildError(programs[j]); + goto exit; + } + + if ((error = + clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, + &localCount, NULL, 0, NULL, NULL))) + { + vlog_error("FAILED -- could not execute kernel\n"); + goto exit; + } + } + + // Get that moving + if ((error = clFlush(gQueue))) vlog("clFlush failed\n"); + + // Calculate the correctly rounded reference result + float *r = (float *)gOut_Ref; + float *s = (float *)gIn; + float *s2 = (float *)gIn2; + float *s3 = (float *)gIn3; + for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++) + r[j] = (float)f->func.f_fff(s[j], s2[j], s3[j]); + + // Read the data back + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + if ((error = + clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, + BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + { + vlog_error("ReadArray failed %d\n", error); + goto exit; + } + } + + if (gSkipCorrectnessTesting) break; + + // Verify data -- No verification possible. + // MAD is a random number generator. + if (0 == (i & 0x0fffffff)) + { + vlog("."); + fflush(stdout); + } + } + + if (!gSkipCorrectnessTesting) + { + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2, + maxErrorVal3); + } + + vlog("\n"); + +exit: + // Release + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + clReleaseKernel(kernels[k]); + clReleaseProgram(programs[k]); + } + + return error; +} diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp index 8f2e0a0c2f..1a6e0c4e1c 100644 --- a/test_conformance/math_brute_force/main.cpp +++ b/test_conformance/math_brute_force/main.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -13,129 +13,126 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "Utility.h" +#include "function_list.h" +#include "sleep.h" +#include "utility.h" + +#include #include #include +#include #include -#include -#include "FunctionList.h" -#include "Sleep.h" +#include #include "harness/errorHelpers.h" #include "harness/kernelHelpers.h" #include "harness/parseParameters.h" - -#if defined( __APPLE__ ) - #include - #include - #include - #include -#elif defined( __linux__ ) - #include - #include - #include - #include +#include "harness/typeWrappers.h" + +#if defined(__APPLE__) +#include +#include +#include +#include +#elif defined(__linux__) +#include +#include +#include +#include #endif -#if defined (__linux__) || (defined WIN32 && defined __MINGW32__) +#if defined(__linux__) || (defined WIN32 && defined __MINGW32__) #include #endif #include "harness/testHarness.h" -#define kPageSize 4096 -#define DOUBLE_REQUIRED_FEATURES ( CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM ) +#define kPageSize 4096 +#define DOUBLE_REQUIRED_FEATURES \ + (CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO \ + | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM) -const char **gTestNames = NULL; -unsigned int gTestNameCount = 0; -char appName[ MAXPATHLEN ] = ""; -cl_device_id gDevice = NULL; -cl_context gContext = NULL; +static std::vector gTestNames; +static char appName[MAXPATHLEN] = ""; +cl_device_id gDevice = NULL; +cl_context gContext = NULL; cl_command_queue gQueue = NULL; -static int32_t gStartTestNumber; -static int32_t gEndTestNumber; -int gSkipCorrectnessTesting = 0; -int gStopOnError = 0; -static bool gSkipRestOfTests; -#if defined( __APPLE__ ) -int gMeasureTimes = 1; -#else -int gMeasureTimes = 0; -#endif -int gReportAverageTimes = 0; -int gForceFTZ = 0; -int gWimpyMode = 0; -int gHasDouble = 0; -int gTestFloat = 1; +static int32_t gStartTestNumber = -1; +static int32_t gEndTestNumber = -1; +int gSkipCorrectnessTesting = 0; +static int gStopOnError = 0; +static bool gSkipRestOfTests; +int gForceFTZ = 0; +int gWimpyMode = 0; +static int gHasDouble = 0; +static int gTestFloat = 1; // This flag should be 'ON' by default and it can be changed through the command // line arguments. static int gTestFastRelaxed = 1; -/*This flag corresponds to defining if the implementation has Derived Fast Relaxed functions. - The spec does not specify ULP for derived function. The derived functions are composed of base functions which are tested for ULP, thus when this flag is enabled, - Derived functions will not be tested for ULP, as per table 7.1 of OpenCL 2.0 spec. - Since there is no way of quering the device whether it is a derived or non-derived implementation according to OpenCL 2.0 spec then it has to be changed through a command line argument. +/*This flag corresponds to defining if the implementation has Derived Fast + Relaxed functions. The spec does not specify ULP for derived function. The + derived functions are composed of base functions which are tested for ULP, + thus when this flag is enabled, Derived functions will not be tested for ULP, + as per table 7.1 of OpenCL 2.0 spec. Since there is no way of quering the + device whether it is a derived or non-derived implementation according to + OpenCL 2.0 spec then it has to be changed through a command line argument. */ -int gFastRelaxedDerived = 1; -int gToggleCorrectlyRoundedDivideSqrt = 0; -int gDeviceILogb0 = 1; -int gDeviceILogbNaN = 1; -int gCheckTininessBeforeRounding = 1; -int gIsInRTZMode = 0; -uint32_t gMaxVectorSizeIndex = VECTOR_SIZE_COUNT; -uint32_t gMinVectorSizeIndex = 0; -const char *method[] = { "Best", "Average" }; -void *gIn = NULL; -void *gIn2 = NULL; -void *gIn3 = NULL; -void *gOut_Ref = NULL; -void *gOut[VECTOR_SIZE_COUNT] = {NULL, NULL, NULL, NULL, NULL, NULL }; -void *gOut_Ref2 = NULL; -void *gOut2[VECTOR_SIZE_COUNT] = {NULL, NULL, NULL, NULL, NULL, NULL }; -cl_mem gInBuffer = NULL; -cl_mem gInBuffer2 = NULL; -cl_mem gInBuffer3 = NULL; -cl_mem gOutBuffer[VECTOR_SIZE_COUNT]= {NULL, NULL, NULL, NULL, NULL, NULL }; -cl_mem gOutBuffer2[VECTOR_SIZE_COUNT]= {NULL, NULL, NULL, NULL, NULL, NULL }; -uint32_t gComputeDevices = 0; -uint32_t gSimdSize = 1; -uint32_t gDeviceFrequency = 0; -static MTdata gMTdata; +int gFastRelaxedDerived = 1; +static int gToggleCorrectlyRoundedDivideSqrt = 0; +int gDeviceILogb0 = 1; +int gDeviceILogbNaN = 1; +int gCheckTininessBeforeRounding = 1; +int gIsInRTZMode = 0; +uint32_t gMaxVectorSizeIndex = VECTOR_SIZE_COUNT; +uint32_t gMinVectorSizeIndex = 0; +void *gIn = NULL; +void *gIn2 = NULL; +void *gIn3 = NULL; +void *gOut_Ref = NULL; +void *gOut[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL }; +void *gOut_Ref2 = NULL; +void *gOut2[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL }; +cl_mem gInBuffer = NULL; +cl_mem gInBuffer2 = NULL; +cl_mem gInBuffer3 = NULL; +cl_mem gOutBuffer[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL }; +cl_mem gOutBuffer2[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL }; +static MTdata gMTdata; cl_device_fp_config gFloatCapabilities = 0; -cl_device_fp_config gDoubleCapabilities = 0; -int gWimpyReductionFactor = 32; -int gWimpyBufferSize = BUFFER_SIZE; -int gVerboseBruteForce = 0; - -static int ParseArgs( int argc, const char **argv ); -static void PrintUsage( void ); -static void PrintFunctions( void ); -test_status InitCL( cl_device_id device ); -static void ReleaseCL( void ); -static int InitILogbConstants( void ); -static int IsTininessDetectedBeforeRounding( void ); -static int IsInRTZMode( void ); //expensive. Please check gIsInRTZMode global instead. - - -int doTest( const char* name ) -{ - if( gSkipRestOfTests ) +int gWimpyReductionFactor = 32; +int gVerboseBruteForce = 0; + +static int ParseArgs(int argc, const char **argv); +static void PrintUsage(void); +static void PrintFunctions(void); +static test_status InitCL(cl_device_id device); +static void ReleaseCL(void); +static int InitILogbConstants(void); +static int IsTininessDetectedBeforeRounding(void); +static int +IsInRTZMode(void); // expensive. Please check gIsInRTZMode global instead. + +static int doTest(const char *name) +{ + if (gSkipRestOfTests) { - vlog( "Skipping function because of an earlier error.\n" ); + vlog("Skipping function because of an earlier error.\n"); return 1; } int error = 0; - const Func* func_data = NULL; + const Func *func_data = NULL; - for( size_t i = 0; i < functionListCount; i++ ) + for (size_t i = 0; i < functionListCount; i++) { - const Func* const temp_func = functionList + i; - if( strcmp( temp_func->name, name ) == 0 ) + const Func *const temp_func = functionList + i; + if (strcmp(temp_func->name, name) == 0) { - if( i < gStartTestNumber || i > gEndTestNumber ) + if ((gStartTestNumber != -1 && i < gStartTestNumber) + || i > gEndTestNumber) { - vlog( "Skipping function #%d\n", i ); + vlog("Skipping function #%d\n", i); return 0; } @@ -144,32 +141,34 @@ int doTest( const char* name ) } } - if( func_data == NULL ) + if (func_data == NULL) { - vlog( "Function '%s' doesn't exist!\n", name ); - exit( EXIT_FAILURE ); + vlog("Function '%s' doesn't exist!\n", name); + exit(EXIT_FAILURE); } - if( func_data->func.p == NULL ) + if (func_data->func.p == NULL) { - vlog( "'%s' is missing implementation, skipping function.\n", func_data->name ); + vlog("'%s' is missing implementation, skipping function.\n", + func_data->name); return 0; } // if correctly rounded divide & sqrt are supported by the implementation // then test it; otherwise skip the test - if( strcmp( func_data->name, "sqrt_cr" ) == 0 || strcmp( func_data->name, "divide_cr" ) == 0 ) + if (strcmp(func_data->name, "sqrt_cr") == 0 + || strcmp(func_data->name, "divide_cr") == 0) { - if( ( gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT ) == 0 ) + if ((gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) == 0) { - vlog( "Correctly rounded divide and sqrt are not supported, skipping function.\n" ); + vlog("Correctly rounded divide and sqrt are not supported, " + "skipping function.\n"); return 0; } } { - extern int my_ilogb(double); - if( 0 == strcmp( "ilogb", func_data->name ) ) + if (0 == strcmp("ilogb", func_data->name)) { InitILogbConstants(); } @@ -200,17 +199,17 @@ int doTest( const char* name ) } } - if( gTestFloat ) + if (gTestFloat) { gTestCount++; - vlog( "%3d: ", gTestCount ); + vlog("%3d: ", gTestCount); // Don't test with relaxed requirements. if (func_data->vtbl_ptr->TestFunc(func_data, gMTdata, false /* relaxed mode */)) { gFailCount++; error++; - if( gStopOnError ) + if (gStopOnError) { gSkipRestOfTests = true; return error; @@ -218,17 +217,18 @@ int doTest( const char* name ) } } - if( gHasDouble && NULL != func_data->vtbl_ptr->DoubleTestFunc && NULL != func_data->dfunc.p ) + if (gHasDouble && NULL != func_data->vtbl_ptr->DoubleTestFunc + && NULL != func_data->dfunc.p) { gTestCount++; - vlog( "%3d: ", gTestCount ); + vlog("%3d: ", gTestCount); // Don't test with relaxed requirements. if (func_data->vtbl_ptr->DoubleTestFunc(func_data, gMTdata, false /* relaxed mode*/)) { gFailCount++; error++; - if( gStopOnError ) + if (gStopOnError) { gSkipRestOfTests = true; return error; @@ -240,515 +240,64 @@ int doTest( const char* name ) return error; } -int test_acos( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "acos" ); -} -int test_acosh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "acosh" ); -} -int test_acospi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "acospi" ); -} -int test_asin( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "asin" ); -} -int test_asinh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "asinh" ); -} -int test_asinpi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "asinpi" ); -} -int test_atan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "atan" ); -} -int test_atanh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "atanh" ); -} -int test_atanpi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "atanpi" ); -} -int test_atan2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "atan2" ); -} -int test_atan2pi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "atan2pi" ); -} -int test_cbrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "cbrt" ); -} -int test_ceil( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "ceil" ); -} -int test_copysign( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "copysign" ); -} -int test_cos( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "cos" ); -} -int test_cosh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "cosh" ); -} -int test_cospi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "cospi" ); -} -int test_exp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "exp" ); -} -int test_exp2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "exp2" ); -} -int test_exp10( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "exp10" ); -} -int test_expm1( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "expm1" ); -} -int test_fabs( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "fabs" ); -} -int test_fdim( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "fdim" ); -} -int test_floor( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "floor" ); -} -int test_fma( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "fma" ); -} -int test_fmax( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "fmax" ); -} -int test_fmin( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "fmin" ); -} -int test_fmod( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "fmod" ); -} -int test_fract( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "fract" ); -} -int test_frexp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "frexp" ); -} -int test_hypot( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "hypot" ); -} -int test_ilogb( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "ilogb" ); -} -int test_isequal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "isequal" ); -} -int test_isfinite( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "isfinite" ); -} -int test_isgreater( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "isgreater" ); -} -int test_isgreaterequal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "isgreaterequal" ); -} -int test_isinf( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "isinf" ); -} -int test_isless( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "isless" ); -} -int test_islessequal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "islessequal" ); -} -int test_islessgreater( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "islessgreater" ); -} -int test_isnan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "isnan" ); -} -int test_isnormal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "isnormal" ); -} -int test_isnotequal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "isnotequal" ); -} -int test_isordered( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "isordered" ); -} -int test_isunordered( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "isunordered" ); -} -int test_ldexp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "ldexp" ); -} -int test_lgamma( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "lgamma" ); -} -int test_lgamma_r( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "lgamma_r" ); -} -int test_log( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "log" ); -} -int test_log2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "log2" ); -} -int test_log10( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "log10" ); -} -int test_log1p( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "log1p" ); -} -int test_logb( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "logb" ); -} -int test_mad( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "mad" ); -} -int test_maxmag( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "maxmag" ); -} -int test_minmag( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "minmag" ); -} -int test_modf( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "modf" ); -} -int test_nan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "nan" ); -} -int test_nextafter( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "nextafter" ); -} -int test_pow( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "pow" ); -} -int test_pown( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "pown" ); -} -int test_powr( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "powr" ); -} -int test_remainder( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "remainder" ); -} -int test_remquo( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "remquo" ); -} -int test_rint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "rint" ); -} -int test_rootn( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "rootn" ); -} -int test_round( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "round" ); -} -int test_rsqrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "rsqrt" ); -} -int test_signbit( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "signbit" ); -} -int test_sin( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "sin" ); -} -int test_sincos( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "sincos" ); -} -int test_sinh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "sinh" ); -} -int test_sinpi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "sinpi" ); -} -int test_sqrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "sqrt" ); -} -int test_sqrt_cr( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "sqrt_cr" ); -} -int test_tan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "tan" ); -} -int test_tanh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "tanh" ); -} -int test_tanpi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "tanpi" ); -} -int test_trunc( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "trunc" ); -} -int test_half_cos( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "half_cos" ); -} -int test_half_divide( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "half_divide" ); -} -int test_half_exp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "half_exp" ); -} -int test_half_exp2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "half_exp2" ); -} -int test_half_exp10( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "half_exp10" ); -} -int test_half_log( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "half_log" ); -} -int test_half_log2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "half_log2" ); -} -int test_half_log10( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "half_log10" ); -} -int test_half_powr( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "half_powr" ); -} -int test_half_recip( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "half_recip" ); -} -int test_half_rsqrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "half_rsqrt" ); -} -int test_half_sin( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "half_sin" ); -} -int test_half_sqrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "half_sqrt" ); -} -int test_half_tan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "half_tan" ); -} -int test_add( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "add" ); -} -int test_subtract( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "subtract" ); -} -int test_divide( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "divide" ); -} -int test_divide_cr( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "divide_cr" ); -} -int test_multiply( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "multiply" ); -} -int test_assignment( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "assignment" ); -} -int test_not( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) -{ - return doTest( "not" ); -} -test_definition test_list[] = { - ADD_TEST( acos ), - ADD_TEST( acosh ), - ADD_TEST( acospi ), - ADD_TEST( asin ), - ADD_TEST( asinh ), - ADD_TEST( asinpi ), - ADD_TEST( atan ), - ADD_TEST( atanh ), - ADD_TEST( atanpi ), - ADD_TEST( atan2 ), - ADD_TEST( atan2pi ), - ADD_TEST( cbrt ), - ADD_TEST( ceil ), - ADD_TEST( copysign ), - ADD_TEST( cos ), - ADD_TEST( cosh ), - ADD_TEST( cospi ), - ADD_TEST( exp ), - ADD_TEST( exp2 ), - ADD_TEST( exp10 ), - ADD_TEST( expm1 ), - ADD_TEST( fabs ), - ADD_TEST( fdim ), - ADD_TEST( floor ), - ADD_TEST( fma ), - ADD_TEST( fmax ), - ADD_TEST( fmin ), - ADD_TEST( fmod ), - ADD_TEST( fract ), - ADD_TEST( frexp ), - ADD_TEST( hypot ), - ADD_TEST( ilogb ), - ADD_TEST( isequal ), - ADD_TEST( isfinite ), - ADD_TEST( isgreater ), - ADD_TEST( isgreaterequal ), - ADD_TEST( isinf ), - ADD_TEST( isless ), - ADD_TEST( islessequal ), - ADD_TEST( islessgreater ), - ADD_TEST( isnan ), - ADD_TEST( isnormal ), - ADD_TEST( isnotequal ), - ADD_TEST( isordered ), - ADD_TEST( isunordered ), - ADD_TEST( ldexp ), - ADD_TEST( lgamma ), - ADD_TEST( lgamma_r ), - ADD_TEST( log ), - ADD_TEST( log2 ), - ADD_TEST( log10 ), - ADD_TEST( log1p ), - ADD_TEST( logb ), - ADD_TEST( mad ), - ADD_TEST( maxmag ), - ADD_TEST( minmag ), - ADD_TEST( modf ), - ADD_TEST( nan ), - ADD_TEST( nextafter ), - ADD_TEST( pow ), - ADD_TEST( pown ), - ADD_TEST( powr ), - ADD_TEST( remainder ), - ADD_TEST( remquo ), - ADD_TEST( rint ), - ADD_TEST( rootn ), - ADD_TEST( round ), - ADD_TEST( rsqrt ), - ADD_TEST( signbit ), - ADD_TEST( sin ), - ADD_TEST( sincos ), - ADD_TEST( sinh ), - ADD_TEST( sinpi ), - ADD_TEST( sqrt ), - ADD_TEST( sqrt_cr ), - ADD_TEST( tan ), - ADD_TEST( tanh ), - ADD_TEST( tanpi ), - ADD_TEST( trunc ), - ADD_TEST( half_cos ), - ADD_TEST( half_divide ), - ADD_TEST( half_exp ), - ADD_TEST( half_exp2 ), - ADD_TEST( half_exp10 ), - ADD_TEST( half_log ), - ADD_TEST( half_log2 ), - ADD_TEST( half_log10 ), - ADD_TEST( half_powr ), - ADD_TEST( half_recip ), - ADD_TEST( half_rsqrt ), - ADD_TEST( half_sin ), - ADD_TEST( half_sqrt ), - ADD_TEST( half_tan ), - ADD_TEST( add ), - ADD_TEST( subtract ), - ADD_TEST( divide ), - ADD_TEST( divide_cr ), - ADD_TEST( multiply ), - ADD_TEST( assignment ), - ADD_TEST( not ), +#define TEST_LAMBDA(name) \ + [](cl_device_id, cl_context, cl_command_queue, int) { \ + return doTest(#name); \ + } + +// Redefine ADD_TEST to use TEST_LAMBDA. +#undef ADD_TEST +#define ADD_TEST(name) \ + { \ + TEST_LAMBDA(name), #name, Version(1, 0) \ + } + +static test_definition test_list[] = { + ADD_TEST(acos), ADD_TEST(acosh), ADD_TEST(acospi), + ADD_TEST(asin), ADD_TEST(asinh), ADD_TEST(asinpi), + ADD_TEST(atan), ADD_TEST(atanh), ADD_TEST(atanpi), + ADD_TEST(atan2), ADD_TEST(atan2pi), ADD_TEST(cbrt), + ADD_TEST(ceil), ADD_TEST(copysign), ADD_TEST(cos), + ADD_TEST(cosh), ADD_TEST(cospi), ADD_TEST(exp), + ADD_TEST(exp2), ADD_TEST(exp10), ADD_TEST(expm1), + ADD_TEST(fabs), ADD_TEST(fdim), ADD_TEST(floor), + ADD_TEST(fma), ADD_TEST(fmax), ADD_TEST(fmin), + ADD_TEST(fmod), ADD_TEST(fract), ADD_TEST(frexp), + ADD_TEST(hypot), ADD_TEST(ilogb), ADD_TEST(isequal), + ADD_TEST(isfinite), ADD_TEST(isgreater), ADD_TEST(isgreaterequal), + ADD_TEST(isinf), ADD_TEST(isless), ADD_TEST(islessequal), + ADD_TEST(islessgreater), ADD_TEST(isnan), ADD_TEST(isnormal), + ADD_TEST(isnotequal), ADD_TEST(isordered), ADD_TEST(isunordered), + ADD_TEST(ldexp), ADD_TEST(lgamma), ADD_TEST(lgamma_r), + ADD_TEST(log), ADD_TEST(log2), ADD_TEST(log10), + ADD_TEST(log1p), ADD_TEST(logb), ADD_TEST(mad), + ADD_TEST(maxmag), ADD_TEST(minmag), ADD_TEST(modf), + ADD_TEST(nan), ADD_TEST(nextafter), ADD_TEST(pow), + ADD_TEST(pown), ADD_TEST(powr), ADD_TEST(remainder), + ADD_TEST(remquo), ADD_TEST(rint), ADD_TEST(rootn), + ADD_TEST(round), ADD_TEST(rsqrt), ADD_TEST(signbit), + ADD_TEST(sin), ADD_TEST(sincos), ADD_TEST(sinh), + ADD_TEST(sinpi), ADD_TEST(sqrt), ADD_TEST(sqrt_cr), + ADD_TEST(tan), ADD_TEST(tanh), ADD_TEST(tanpi), + ADD_TEST(trunc), ADD_TEST(half_cos), ADD_TEST(half_divide), + ADD_TEST(half_exp), ADD_TEST(half_exp2), ADD_TEST(half_exp10), + ADD_TEST(half_log), ADD_TEST(half_log2), ADD_TEST(half_log10), + ADD_TEST(half_powr), ADD_TEST(half_recip), ADD_TEST(half_rsqrt), + ADD_TEST(half_sin), ADD_TEST(half_sqrt), ADD_TEST(half_tan), + ADD_TEST(add), ADD_TEST(subtract), ADD_TEST(divide), + ADD_TEST(divide_cr), ADD_TEST(multiply), ADD_TEST(assignment), + ADD_TEST(not), }; -const int test_num = ARRAY_SIZE( test_list ); +#undef ADD_TEST +#undef TEST_LAMBDA + +static const int test_num = ARRAY_SIZE(test_list); #pragma mark - -int main (int argc, const char * argv[]) +int main(int argc, const char *argv[]) { int error; @@ -758,63 +307,36 @@ int main (int argc, const char * argv[]) return -1; } -#if defined( __APPLE__ ) - struct timeval startTime; - gettimeofday( &startTime, NULL ); -#endif - - error = ParseArgs( argc, argv ); - if( error ) - return error; + error = ParseArgs(argc, argv); + if (error) return error; // This takes a while, so prevent the machine from going to sleep. PreventSleep(); - atexit( ResumeSleep ); + atexit(ResumeSleep); - if( gSkipCorrectnessTesting ) - vlog( "*** Skipping correctness testing! ***\n\n" ); - else if( gStopOnError ) - vlog( "Stopping at first error.\n" ); + if (gSkipCorrectnessTesting) + vlog("*** Skipping correctness testing! ***\n\n"); + else if (gStopOnError) + vlog("Stopping at first error.\n"); - if( gMeasureTimes ) - { - vlog( "%s times are reported at right (cycles per element):\n", method[gReportAverageTimes] ); - vlog( "\n" ); - if( gSkipCorrectnessTesting ) - vlog( " \t "); - else - vlog( " \t "); - if( gWimpyMode ) - vlog( " " ); - for( int i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - vlog( "\t float%s", sizeNames[i] ); - } - else - { - vlog( " \t "); - if( gWimpyMode ) - vlog( " " ); - } - if( ! gSkipCorrectnessTesting ) - vlog( "\t max_ulps" ); + vlog(" \t "); + if (gWimpyMode) vlog(" "); + if (!gSkipCorrectnessTesting) vlog("\t max_ulps"); - vlog( "\n-----------------------------------------------------------------------------------------------------------\n" ); + vlog("\n-------------------------------------------------------------------" + "----------------------------------------\n"); - gMTdata = init_genrand( gRandomSeed ); - if( gEndTestNumber == 0 ) - { - gEndTestNumber = functionListCount; - } + gMTdata = init_genrand(gRandomSeed); FPU_mode_type oldMode; - DisableFTZ( &oldMode ); + DisableFTZ(&oldMode); - int ret = runTestHarnessWithCheck( gTestNameCount, gTestNames, test_num, test_list, true, 0, InitCL ); + int ret = runTestHarnessWithCheck(gTestNames.size(), gTestNames.data(), + test_num, test_list, true, 0, InitCL); - RestoreFPState( &oldMode ); + RestoreFPState(&oldMode); free_mtdata(gMTdata); - free(gTestNames); if (gQueue) { @@ -824,116 +346,74 @@ int main (int argc, const char * argv[]) ReleaseCL(); -#if defined( __APPLE__ ) - struct timeval endTime; - gettimeofday( &endTime, NULL ); - double time = (double) endTime.tv_sec - (double) startTime.tv_sec; - time += 1e-6 * ((double) endTime.tv_usec - (double) startTime.tv_usec); - vlog( "time: %f s\n", time ); -#endif - return ret; } -static int ParseArgs( int argc, const char **argv ) +static int ParseArgs(int argc, const char **argv) { - int i; - gTestNames = (const char**) calloc( argc - 1, sizeof( char*) ); - if( NULL == gTestNames ) - { - vlog( "Failed to allocate memory for gTestNames array.\n" ); - return 1; - } - gTestNames[0] = argv[0]; - gTestNameCount = 1; + // We only pass test names to runTestHarnessWithCheck, hence global command + // line options defined by the harness cannot be used by the user. + // To respect the implementation details of runTestHarnessWithCheck, + // gTestNames[0] has to exist although its value is not important. + gTestNames.push_back(""); + int singleThreaded = 0; { // Extract the app name - strncpy( appName, argv[0], MAXPATHLEN ); + strncpy(appName, argv[0], MAXPATHLEN); -#if defined( __APPLE__ ) +#if defined(__APPLE__) char baseName[MAXPATHLEN]; char *base = NULL; - strncpy( baseName, argv[0], MAXPATHLEN ); - base = basename( baseName ); - if( NULL != base ) + strncpy(baseName, argv[0], MAXPATHLEN); + base = basename(baseName); + if (NULL != base) { - strncpy( appName, base, sizeof( appName ) ); - appName[ sizeof( appName ) -1 ] = '\0'; + strncpy(appName, base, sizeof(appName)); + appName[sizeof(appName) - 1] = '\0'; } #endif } - vlog( "\n%s\t", appName ); - for( i = 1; i < argc; i++ ) + vlog("\n%s\t", appName); + for (int i = 1; i < argc; i++) { const char *arg = argv[i]; - if( NULL == arg ) - break; + if (NULL == arg) break; - vlog( "\t%s", arg ); + vlog("\t%s", arg); int optionFound = 0; - if( arg[0] == '-' ) + if (arg[0] == '-') { - while( arg[1] != '\0' ) + while (arg[1] != '\0') { arg++; optionFound = 1; - switch( *arg ) + switch (*arg) { - case 'a': - gReportAverageTimes ^= 1; - break; - - case 'c': - gToggleCorrectlyRoundedDivideSqrt ^= 1; - break; + case 'c': gToggleCorrectlyRoundedDivideSqrt ^= 1; break; - case 'd': - gHasDouble ^= 1; - break; + case 'd': gHasDouble ^= 1; break; - case 'e': - gFastRelaxedDerived ^= 1; - break; + case 'e': gFastRelaxedDerived ^= 1; break; - case 'f': - gTestFloat ^= 1; - break; - - case 'h': - PrintUsage(); - return -1; + case 'f': gTestFloat ^= 1; break; - case 'p': - PrintFunctions(); - return -1; + case 'h': PrintUsage(); return -1; - case 'l': - gSkipCorrectnessTesting ^= 1; - break; + case 'p': PrintFunctions(); return -1; - case 'm': - singleThreaded ^= 1; - break; + case 'l': gSkipCorrectnessTesting ^= 1; break; - case 'r': - gTestFastRelaxed ^= 1; - break; + case 'm': singleThreaded ^= 1; break; - case 's': - gStopOnError ^= 1; - break; + case 'r': gTestFastRelaxed ^= 1; break; - case 't': - gMeasureTimes ^= 1; - break; + case 's': gStopOnError ^= 1; break; - case 'v': - gVerboseBruteForce ^= 1; - break; + case 'v': gVerboseBruteForce ^= 1; break; - case 'w': // wimpy mode + case 'w': // wimpy mode gWimpyMode ^= 1; break; @@ -941,12 +421,10 @@ static int ParseArgs( int argc, const char **argv ) parseWimpyReductionFactor(arg, gWimpyReductionFactor); break; - case 'z': - gForceFTZ ^= 1; - break; + case 'z': gForceFTZ ^= 1; break; case '1': - if( arg[1] == '6' ) + if (arg[1] == '6') { gMinVectorSizeIndex = 5; gMaxVectorSizeIndex = gMinVectorSizeIndex + 1; @@ -959,179 +437,194 @@ static int ParseArgs( int argc, const char **argv ) } break; case '2': - gMinVectorSizeIndex = 1; - gMaxVectorSizeIndex = gMinVectorSizeIndex + 1; - break; + gMinVectorSizeIndex = 1; + gMaxVectorSizeIndex = gMinVectorSizeIndex + 1; + break; case '3': - gMinVectorSizeIndex = 2; - gMaxVectorSizeIndex = gMinVectorSizeIndex + 1; - break; + gMinVectorSizeIndex = 2; + gMaxVectorSizeIndex = gMinVectorSizeIndex + 1; + break; case '4': - gMinVectorSizeIndex = 3; - gMaxVectorSizeIndex = gMinVectorSizeIndex + 1; - break; + gMinVectorSizeIndex = 3; + gMaxVectorSizeIndex = gMinVectorSizeIndex + 1; + break; case '8': - gMinVectorSizeIndex = 4; - gMaxVectorSizeIndex = gMinVectorSizeIndex + 1; - break; + gMinVectorSizeIndex = 4; + gMaxVectorSizeIndex = gMinVectorSizeIndex + 1; break; default: - vlog( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg ); + vlog(" <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg); PrintUsage(); return -1; } } } - if( ! optionFound ) + if (!optionFound) { char *t = NULL; - long number = strtol( arg, &t, 0 ); - if( t != arg ) + long number = strtol(arg, &t, 0); + if (t != arg) { - if( 0 == gStartTestNumber ) - gStartTestNumber = (int32_t) number; + if (-1 == gStartTestNumber) + gStartTestNumber = (int32_t)number; else - gEndTestNumber = gStartTestNumber + (int32_t) number; + gEndTestNumber = gStartTestNumber + (int32_t)number; } else { // Make sure this is a valid name unsigned int k; - for (k=0; kname) == 0) { - gTestNames[ gTestNameCount ] = arg; - gTestNameCount++; + gTestNames.push_back(arg); break; } } // If we didn't find it in the list of test names if (k >= functionListCount) { - gTestNames[gTestNameCount] = arg; - gTestNameCount++; + gTestNames.push_back(arg); } } } } // Check for the wimpy mode environment variable - if (getenv("CL_WIMPY_MODE")) { - vlog( "\n" ); - vlog( "*** Detected CL_WIMPY_MODE env ***\n" ); - gWimpyMode = 1; + if (getenv("CL_WIMPY_MODE")) + { + vlog("\n"); + vlog("*** Detected CL_WIMPY_MODE env ***\n"); + gWimpyMode = 1; } - vlog( "\nTest binary built %s %s\n", __DATE__, __TIME__ ); + vlog("\nTest binary built %s %s\n", __DATE__, __TIME__); PrintArch(); - if( gWimpyMode ) + if (gWimpyMode) { - vlog( "\n" ); - vlog( "*** WARNING: Testing in Wimpy mode! ***\n" ); - vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" ); - vlog( "*** Wimpy Reduction Factor: %-27u ***\n\n", gWimpyReductionFactor ); + vlog("\n"); + vlog("*** WARNING: Testing in Wimpy mode! ***\n"); + vlog("*** Wimpy mode is not sufficient to verify correctness. ***\n"); + vlog("*** Wimpy Reduction Factor: %-27u ***\n\n", + gWimpyReductionFactor); } - if( singleThreaded ) - SetThreadCount(1); + if (singleThreaded) SetThreadCount(1); return 0; } -static void PrintFunctions ( void ) +static void PrintFunctions(void) { - vlog( "\nMath function names:\n" ); - for( int i = 0; i < functionListCount; i++ ) - { - vlog( "\t%s\n", functionList[ i ].name ); - } + vlog("\nMath function names:\n"); + for (int i = 0; i < functionListCount; i++) + { + vlog("\t%s\n", functionList[i].name); + } } -static void PrintUsage( void ) -{ - vlog( "%s [-acglstz]: \n", appName ); - vlog( "\toptions:\n" ); - vlog( "\t\t-a\tReport average times instead of best times\n" ); - vlog( "\t\t-c\tToggle test fp correctly rounded divide and sqrt (Default: off)\n"); - vlog( "\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 on)\n" ); - vlog( "\t\t-f\tToggle float precision testing. (Default: on)\n" ); - vlog( "\t\t-r\tToggle fast relaxed math precision testing. (Default: on)\n" ); - vlog( "\t\t-e\tToggle test as derived implementations for fast relaxed math precision. (Default: on)\n" ); - vlog( "\t\t-h\tPrint this message and quit\n" ); - vlog( "\t\t-p\tPrint all math function names and quit\n" ); - vlog( "\t\t-l\tlink check only (make sure functions are present, skip accuracy checks.)\n" ); - vlog( "\t\t-m\tToggle run multi-threaded. (Default: on) )\n" ); - vlog( "\t\t-s\tStop on error\n" ); - vlog( "\t\t-t\tToggle timing (on by default)\n" ); - vlog( "\t\t-w\tToggle Wimpy Mode, * Not a valid test * \n"); - vlog( "\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is 1-10, default factor(%u)\n",gWimpyReductionFactor ); - vlog( "\t\t-z\tToggle FTZ mode (Section 6.5.3) for all functions. (Set by device capabilities by default.)\n" ); - vlog( "\t\t-v\tToggle Verbosity (Default: off)\n "); - vlog( "\t\t-#\tTest only vector sizes #, e.g. \"-1\" tests scalar only, \"-16\" tests 16-wide vectors only.\n" ); - vlog( "\n\tYou may also pass a number instead of a function name.\n" ); - vlog( "\tThis causes the first N tests to be skipped. The tests are numbered.\n" ); - vlog( "\tIf you pass a second number, that is the number tests to run after the first one.\n" ); - vlog( "\tA name list may be used in conjunction with a number range. In that case,\n" ); - vlog( "\tonly the named cases in the number range will run.\n" ); - vlog( "\tYou may also choose to pass no arguments, in which case all tests will be run.\n" ); - vlog( "\tYou may pass CL_DEVICE_TYPE_CPU/GPU/ACCELERATOR to select the device.\n" ); - vlog( "\n" ); +static void PrintUsage(void) +{ + vlog("%s [-cglsz]: \n", appName); + vlog("\toptions:\n"); + vlog("\t\t-c\tToggle test fp correctly rounded divide and sqrt (Default: " + "off)\n"); + vlog("\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 " + "on)\n"); + vlog("\t\t-f\tToggle float precision testing. (Default: on)\n"); + vlog("\t\t-r\tToggle fast relaxed math precision testing. (Default: on)\n"); + vlog("\t\t-e\tToggle test as derived implementations for fast relaxed math " + "precision. (Default: on)\n"); + vlog("\t\t-h\tPrint this message and quit\n"); + vlog("\t\t-p\tPrint all math function names and quit\n"); + vlog("\t\t-l\tlink check only (make sure functions are present, skip " + "accuracy checks.)\n"); + vlog("\t\t-m\tToggle run multi-threaded. (Default: on) )\n"); + vlog("\t\t-s\tStop on error\n"); + vlog("\t\t-w\tToggle Wimpy Mode, * Not a valid test * \n"); + vlog("\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is " + "1-10, default factor(%u)\n", + gWimpyReductionFactor); + vlog("\t\t-z\tToggle FTZ mode (Section 6.5.3) for all functions. (Set by " + "device capabilities by default.)\n"); + vlog("\t\t-v\tToggle Verbosity (Default: off)\n "); + vlog("\t\t-#\tTest only vector sizes #, e.g. \"-1\" tests scalar only, " + "\"-16\" tests 16-wide vectors only.\n"); + vlog("\n\tYou may also pass a number instead of a function name.\n"); + vlog("\tThis causes the first N tests to be skipped. The tests are " + "numbered.\n"); + vlog("\tIf you pass a second number, that is the number tests to run after " + "the first one.\n"); + vlog("\tA name list may be used in conjunction with a number range. In " + "that case,\n"); + vlog("\tonly the named cases in the number range will run.\n"); + vlog("\tYou may also choose to pass no arguments, in which case all tests " + "will be run.\n"); + vlog("\tYou may pass CL_DEVICE_TYPE_CPU/GPU/ACCELERATOR to select the " + "device.\n"); + vlog("\n"); } -static void CL_CALLBACK bruteforce_notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data) +static void CL_CALLBACK bruteforce_notify_callback(const char *errinfo, + const void *private_info, + size_t cb, void *user_data) { - vlog( "%s (%p, %zd, %p)\n", errinfo, private_info, cb, user_data ); + vlog("%s (%p, %zd, %p)\n", errinfo, private_info, cb, user_data); } -test_status InitCL( cl_device_id device ) +test_status InitCL(cl_device_id device) { int error; uint32_t i; - size_t configSize = sizeof( gComputeDevices ); cl_device_type device_type; - error = clGetDeviceInfo( device, CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL ); - if( error ) + error = clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(device_type), + &device_type, NULL); + if (error) { - print_error( error, "Unable to get device type" ); + print_error(error, "Unable to get device type"); return TEST_FAIL; } gDevice = device; - if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_MAX_COMPUTE_UNITS, configSize, &gComputeDevices, NULL )) ) - gComputeDevices = 1; // Check extensions - if(is_extension_available(gDevice, "cl_khr_fp64")) + if (is_extension_available(gDevice, "cl_khr_fp64")) { gHasDouble ^= 1; -#if defined( CL_DEVICE_DOUBLE_FP_CONFIG ) - if( (error = clGetDeviceInfo(gDevice, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(gDoubleCapabilities), &gDoubleCapabilities, NULL))) +#if defined(CL_DEVICE_DOUBLE_FP_CONFIG) + cl_device_fp_config doubleCapabilities = 0; + if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_DOUBLE_FP_CONFIG, + sizeof(doubleCapabilities), + &doubleCapabilities, NULL))) { - vlog_error( "ERROR: Unable to get device CL_DEVICE_DOUBLE_FP_CONFIG. (%d)\n", error ); + vlog_error("ERROR: Unable to get device " + "CL_DEVICE_DOUBLE_FP_CONFIG. (%d)\n", + error); return TEST_FAIL; } - if( DOUBLE_REQUIRED_FEATURES != (gDoubleCapabilities & DOUBLE_REQUIRED_FEATURES) ) + if (DOUBLE_REQUIRED_FEATURES + != (doubleCapabilities & DOUBLE_REQUIRED_FEATURES)) { std::string list; - if (0 == (gDoubleCapabilities & CL_FP_FMA)) list += "CL_FP_FMA, "; - if( 0 == (gDoubleCapabilities & CL_FP_ROUND_TO_NEAREST) ) + if (0 == (doubleCapabilities & CL_FP_FMA)) list += "CL_FP_FMA, "; + if (0 == (doubleCapabilities & CL_FP_ROUND_TO_NEAREST)) list += "CL_FP_ROUND_TO_NEAREST, "; - if( 0 == (gDoubleCapabilities & CL_FP_ROUND_TO_ZERO) ) + if (0 == (doubleCapabilities & CL_FP_ROUND_TO_ZERO)) list += "CL_FP_ROUND_TO_ZERO, "; - if( 0 == (gDoubleCapabilities & CL_FP_ROUND_TO_INF) ) + if (0 == (doubleCapabilities & CL_FP_ROUND_TO_INF)) list += "CL_FP_ROUND_TO_INF, "; - if( 0 == (gDoubleCapabilities & CL_FP_INF_NAN) ) + if (0 == (doubleCapabilities & CL_FP_INF_NAN)) list += "CL_FP_INF_NAN, "; - if( 0 == (gDoubleCapabilities & CL_FP_DENORM) ) + if (0 == (doubleCapabilities & CL_FP_DENORM)) list += "CL_FP_DENORM, "; vlog_error("ERROR: required double features are missing: %s\n", list.c_str()); @@ -1139,100 +632,102 @@ test_status InitCL( cl_device_id device ) return TEST_FAIL; } #else - vlog_error( "FAIL: device says it supports cl_khr_fp64 but CL_DEVICE_DOUBLE_FP_CONFIG is not in the headers!\n" ); + vlog_error("FAIL: device says it supports cl_khr_fp64 but " + "CL_DEVICE_DOUBLE_FP_CONFIG is not in the headers!\n"); return TEST_FAIL; #endif } - configSize = sizeof( gDeviceFrequency ); - if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_MAX_CLOCK_FREQUENCY, configSize, &gDeviceFrequency, NULL )) ) - gDeviceFrequency = 0; + uint32_t deviceFrequency = 0; + size_t configSize = sizeof(deviceFrequency); + if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_MAX_CLOCK_FREQUENCY, + configSize, &deviceFrequency, NULL))) + deviceFrequency = 0; - if( (error = clGetDeviceInfo(gDevice, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(gFloatCapabilities), &gFloatCapabilities, NULL))) + if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_SINGLE_FP_CONFIG, + sizeof(gFloatCapabilities), + &gFloatCapabilities, NULL))) { - vlog_error( "ERROR: Unable to get device CL_DEVICE_SINGLE_FP_CONFIG. (%d)\n", error ); + vlog_error( + "ERROR: Unable to get device CL_DEVICE_SINGLE_FP_CONFIG. (%d)\n", + error); return TEST_FAIL; } - gContext = clCreateContext( NULL, 1, &gDevice, bruteforce_notify_callback, NULL, &error ); - if( NULL == gContext || error ) + gContext = clCreateContext(NULL, 1, &gDevice, bruteforce_notify_callback, + NULL, &error); + if (NULL == gContext || error) { - vlog_error( "clCreateContext failed. (%d) \n", error ); + vlog_error("clCreateContext failed. (%d) \n", error); return TEST_FAIL; } gQueue = clCreateCommandQueue(gContext, gDevice, 0, &error); - if( NULL == gQueue || error ) + if (NULL == gQueue || error) { - vlog_error( "clCreateCommandQueue failed. (%d)\n", error ); + vlog_error("clCreateCommandQueue failed. (%d)\n", error); return TEST_FAIL; } -#if defined( __APPLE__ ) - // FIXME: use clProtectedArray -#endif - //Allocate buffers + // Allocate buffers cl_uint min_alignment = 0; - error = clGetDeviceInfo (gDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(cl_uint), (void*)&min_alignment, NULL); + error = clGetDeviceInfo(gDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN, + sizeof(cl_uint), (void *)&min_alignment, NULL); if (CL_SUCCESS != error) { - vlog_error( "clGetDeviceInfo failed. (%d)\n", error ); + vlog_error("clGetDeviceInfo failed. (%d)\n", error); return TEST_FAIL; } - min_alignment >>= 3; // convert bits to bytes - - gIn = align_malloc( BUFFER_SIZE, min_alignment ); - if( NULL == gIn ) - return TEST_FAIL; - gIn2 = align_malloc( BUFFER_SIZE, min_alignment ); - if( NULL == gIn2 ) - return TEST_FAIL; - gIn3 = align_malloc( BUFFER_SIZE, min_alignment ); - if( NULL == gIn3 ) - return TEST_FAIL; - gOut_Ref = align_malloc( BUFFER_SIZE, min_alignment ); - if( NULL == gOut_Ref ) - return TEST_FAIL; - gOut_Ref2 = align_malloc( BUFFER_SIZE, min_alignment ); - if( NULL == gOut_Ref2 ) - return TEST_FAIL; - - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) + min_alignment >>= 3; // convert bits to bytes + + gIn = align_malloc(BUFFER_SIZE, min_alignment); + if (NULL == gIn) return TEST_FAIL; + gIn2 = align_malloc(BUFFER_SIZE, min_alignment); + if (NULL == gIn2) return TEST_FAIL; + gIn3 = align_malloc(BUFFER_SIZE, min_alignment); + if (NULL == gIn3) return TEST_FAIL; + gOut_Ref = align_malloc(BUFFER_SIZE, min_alignment); + if (NULL == gOut_Ref) return TEST_FAIL; + gOut_Ref2 = align_malloc(BUFFER_SIZE, min_alignment); + if (NULL == gOut_Ref2) return TEST_FAIL; + + for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { - gOut[i] = align_malloc( BUFFER_SIZE, min_alignment ); - if( NULL == gOut[i] ) - return TEST_FAIL; - gOut2[i] = align_malloc( BUFFER_SIZE, min_alignment ); - if( NULL == gOut2[i] ) - return TEST_FAIL; + gOut[i] = align_malloc(BUFFER_SIZE, min_alignment); + if (NULL == gOut[i]) return TEST_FAIL; + gOut2[i] = align_malloc(BUFFER_SIZE, min_alignment); + if (NULL == gOut2[i]) return TEST_FAIL; } cl_mem_flags device_flags = CL_MEM_READ_ONLY; // save a copy on the host device to make this go faster - if( CL_DEVICE_TYPE_CPU == device_type ) + if (CL_DEVICE_TYPE_CPU == device_type) device_flags |= CL_MEM_USE_HOST_PTR; - else - device_flags |= CL_MEM_COPY_HOST_PTR; + else + device_flags |= CL_MEM_COPY_HOST_PTR; // setup input buffers - gInBuffer = clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn, &error); - if( gInBuffer == NULL || error ) + gInBuffer = + clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn, &error); + if (gInBuffer == NULL || error) { - vlog_error( "clCreateBuffer1 failed for input (%d)\n", error ); + vlog_error("clCreateBuffer1 failed for input (%d)\n", error); return TEST_FAIL; } - gInBuffer2 = clCreateBuffer( gContext, device_flags, BUFFER_SIZE, gIn2, &error ); - if( gInBuffer2 == NULL || error ) + gInBuffer2 = + clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn2, &error); + if (gInBuffer2 == NULL || error) { - vlog_error( "clCreateArray2 failed for input (%d)\n" , error ); + vlog_error("clCreateBuffer2 failed for input (%d)\n", error); return TEST_FAIL; } - gInBuffer3 = clCreateBuffer( gContext, device_flags, BUFFER_SIZE, gIn3, &error ); - if( gInBuffer3 == NULL || error) + gInBuffer3 = + clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn3, &error); + if (gInBuffer3 == NULL || error) { - vlog_error( "clCreateArray3 failed for input (%d)\n", error ); + vlog_error("clCreateBuffer3 failed for input (%d)\n", error); return TEST_FAIL; } @@ -1240,38 +735,40 @@ test_status InitCL( cl_device_id device ) // setup output buffers device_flags = CL_MEM_READ_WRITE; // save a copy on the host device to make this go faster - if( CL_DEVICE_TYPE_CPU == device_type ) + if (CL_DEVICE_TYPE_CPU == device_type) device_flags |= CL_MEM_USE_HOST_PTR; - else - device_flags |= CL_MEM_COPY_HOST_PTR; - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) + else + device_flags |= CL_MEM_COPY_HOST_PTR; + for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { - gOutBuffer[i] = clCreateBuffer( gContext, device_flags, BUFFER_SIZE, gOut[i], &error ); - if( gOutBuffer[i] == NULL || error ) + gOutBuffer[i] = clCreateBuffer(gContext, device_flags, BUFFER_SIZE, + gOut[i], &error); + if (gOutBuffer[i] == NULL || error) { - vlog_error( "clCreateArray failed for output (%d)\n", error ); + vlog_error("clCreateBuffer failed for output (%d)\n", error); return TEST_FAIL; } - gOutBuffer2[i] = clCreateBuffer( gContext, device_flags, BUFFER_SIZE, gOut2[i], &error ); - if( gOutBuffer2[i] == NULL || error) + gOutBuffer2[i] = clCreateBuffer(gContext, device_flags, BUFFER_SIZE, + gOut2[i], &error); + if (gOutBuffer2[i] == NULL || error) { - vlog_error( "clCreateArray2 failed for output (%d)\n", error ); + vlog_error("clCreateBuffer2 failed for output (%d)\n", error); return TEST_FAIL; } } // we are embedded, check current rounding mode - if( gIsEmbedded ) + if (gIsEmbedded) { gIsInRTZMode = IsInRTZMode(); } - //Check tininess detection + // Check tininess detection IsTininessDetectedBeforeRounding(); cl_platform_id platform; int err = clGetPlatformIDs(1, &platform, NULL); - if( err ) + if (err) { print_error(err, "clGetPlatformIDs failed"); return TEST_FAIL; @@ -1279,78 +776,97 @@ test_status InitCL( cl_device_id device ) char c[1024]; static const char *no_yes[] = { "NO", "YES" }; - vlog( "\nCompute Device info:\n" ); + vlog("\nCompute Device info:\n"); clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(c), &c, NULL); - vlog( "\tPlatform Version: %s\n", c ); + vlog("\tPlatform Version: %s\n", c); clGetDeviceInfo(gDevice, CL_DEVICE_NAME, sizeof(c), &c, NULL); - vlog( "\tDevice Name: %s\n", c ); + vlog("\tDevice Name: %s\n", c); clGetDeviceInfo(gDevice, CL_DEVICE_VENDOR, sizeof(c), &c, NULL); - vlog( "\tVendor: %s\n", c ); + vlog("\tVendor: %s\n", c); clGetDeviceInfo(gDevice, CL_DEVICE_VERSION, sizeof(c), &c, NULL); - vlog( "\tDevice Version: %s\n", c ); + vlog("\tDevice Version: %s\n", c); clGetDeviceInfo(gDevice, CL_DEVICE_OPENCL_C_VERSION, sizeof(c), &c, NULL); - vlog( "\tCL C Version: %s\n", c ); + vlog("\tCL C Version: %s\n", c); clGetDeviceInfo(gDevice, CL_DRIVER_VERSION, sizeof(c), &c, NULL); - vlog( "\tDriver Version: %s\n", c ); - vlog( "\tDevice Frequency: %d MHz\n", gDeviceFrequency ); - vlog( "\tSubnormal values supported for floats? %s\n", no_yes[0 != (CL_FP_DENORM & gFloatCapabilities)] ); - vlog( "\tCorrectly rounded divide and sqrt supported for floats? %s\n", no_yes[0 != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)] ); - if( gToggleCorrectlyRoundedDivideSqrt ) + vlog("\tDriver Version: %s\n", c); + vlog("\tDevice Frequency: %d MHz\n", deviceFrequency); + vlog("\tSubnormal values supported for floats? %s\n", + no_yes[0 != (CL_FP_DENORM & gFloatCapabilities)]); + vlog("\tCorrectly rounded divide and sqrt supported for floats? %s\n", + no_yes[0 + != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]); + if (gToggleCorrectlyRoundedDivideSqrt) { gFloatCapabilities ^= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT; } - vlog( "\tTesting with correctly rounded float divide and sqrt? %s\n", no_yes[0 != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)] ); - vlog( "\tTesting with FTZ mode ON for floats? %s\n", no_yes[0 != gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities)] ); - vlog( "\tTesting single precision? %s\n", no_yes[0 != gTestFloat] ); - vlog( "\tTesting fast relaxed math? %s\n", no_yes[0 != gTestFastRelaxed] ); - if(gTestFastRelaxed) + vlog("\tTesting with correctly rounded float divide and sqrt? %s\n", + no_yes[0 + != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]); + vlog("\tTesting with FTZ mode ON for floats? %s\n", + no_yes[0 != gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities)]); + vlog("\tTesting single precision? %s\n", no_yes[0 != gTestFloat]); + vlog("\tTesting fast relaxed math? %s\n", no_yes[0 != gTestFastRelaxed]); + if (gTestFastRelaxed) { - vlog( "\tFast relaxed math has derived implementations? %s\n", no_yes[0 != gFastRelaxedDerived] ); + vlog("\tFast relaxed math has derived implementations? %s\n", + no_yes[0 != gFastRelaxedDerived]); } - vlog( "\tTesting double precision? %s\n", no_yes[0 != gHasDouble] ); - if( sizeof( long double) == sizeof( double ) && gHasDouble ) + vlog("\tTesting double precision? %s\n", no_yes[0 != gHasDouble]); + if (sizeof(long double) == sizeof(double) && gHasDouble) { - vlog( "\n\t\tWARNING: Host system long double does not have better precision than double!\n" ); - vlog( "\t\t All double results that do not match the reference result have their reported\n" ); - vlog( "\t\t error inflated by 0.5 ulps to account for the fact that this system\n" ); - vlog( "\t\t can not accurately represent the right result to an accuracy closer\n" ); - vlog( "\t\t than half an ulp. See comments in Bruteforce_Ulp_Error_Double() for more details.\n\n" ); + vlog("\n\t\tWARNING: Host system long double does not have better " + "precision than double!\n"); + vlog("\t\t All double results that do not match the reference " + "result have their reported\n"); + vlog("\t\t error inflated by 0.5 ulps to account for the fact " + "that this system\n"); + vlog("\t\t can not accurately represent the right result to an " + "accuracy closer\n"); + vlog("\t\t than half an ulp. See comments in " + "Bruteforce_Ulp_Error_Double() for more details.\n\n"); } - vlog( "\tIs Embedded? %s\n", no_yes[0 != gIsEmbedded] ); - if( gIsEmbedded ) - vlog( "\tRunning in RTZ mode? %s\n", no_yes[0 != gIsInRTZMode] ); - vlog( "\tTininess is detected before rounding? %s\n", no_yes[0 != gCheckTininessBeforeRounding] ); - vlog( "\tWorker threads: %d\n", GetThreadCount() ); - vlog( "\tTesting vector sizes:" ); - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - vlog( "\t%d", sizeValues[i] ); + vlog("\tIs Embedded? %s\n", no_yes[0 != gIsEmbedded]); + if (gIsEmbedded) + vlog("\tRunning in RTZ mode? %s\n", no_yes[0 != gIsInRTZMode]); + vlog("\tTininess is detected before rounding? %s\n", + no_yes[0 != gCheckTininessBeforeRounding]); + vlog("\tWorker threads: %d\n", GetThreadCount()); + vlog("\tTesting vector sizes:"); + for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + vlog("\t%d", sizeValues[i]); vlog("\n"); vlog("\tVerbose? %s\n", no_yes[0 != gVerboseBruteForce]); - vlog( "\n\n" ); + vlog("\n\n"); - // Check to see if we are using single threaded mode on other than a 1.0 device - if (getenv( "CL_TEST_SINGLE_THREADED" )) { + // Check to see if we are using single threaded mode on other than a 1.0 + // device + if (getenv("CL_TEST_SINGLE_THREADED")) + { - char device_version[1024] = { 0 }; - clGetDeviceInfo( gDevice, CL_DEVICE_VERSION, sizeof(device_version), device_version, NULL ); + char device_version[1024] = { 0 }; + clGetDeviceInfo(gDevice, CL_DEVICE_VERSION, sizeof(device_version), + device_version, NULL); - if (strcmp("OpenCL 1.0 ",device_version)) { - vlog("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. Running single threaded.\n"); - } + if (strcmp("OpenCL 1.0 ", device_version)) + { + vlog("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. " + "Running single threaded.\n"); + } } return TEST_PASS; } -static void ReleaseCL( void ) +static void ReleaseCL(void) { uint32_t i; clReleaseMemObject(gInBuffer); clReleaseMemObject(gInBuffer2); clReleaseMemObject(gInBuffer3); - for ( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { + for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { clReleaseMemObject(gOutBuffer[i]); clReleaseMemObject(gOutBuffer2[i]); } @@ -1363,128 +879,148 @@ static void ReleaseCL( void ) align_free(gOut_Ref); align_free(gOut_Ref2); - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) + for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { align_free(gOut[i]); align_free(gOut2[i]); } } -void _LogBuildError( cl_program p, int line, const char *file ) +void _LogBuildError(cl_program p, int line, const char *file) { char the_log[2048] = ""; - vlog_error( "%s:%d: Build Log:\n", file, line ); - if( 0 == clGetProgramBuildInfo(p, gDevice, CL_PROGRAM_BUILD_LOG, sizeof(the_log), the_log, NULL) ) - vlog_error( "%s", the_log ); + vlog_error("%s:%d: Build Log:\n", file, line); + if (0 + == clGetProgramBuildInfo(p, gDevice, CL_PROGRAM_BUILD_LOG, + sizeof(the_log), the_log, NULL)) + vlog_error("%s", the_log); else - vlog_error( "*** Error getting build log for program %p\n", p ); + vlog_error("*** Error getting build log for program %p\n", p); } -int InitILogbConstants( void ) +int InitILogbConstants(void) { int error; - const char *kernel = - "__kernel void GetILogBConstants( __global int *out )\n" - "{\n" - " out[0] = FP_ILOGB0;\n" - " out[1] = FP_ILOGBNAN;\n" - "}\n"; - - cl_program query; - error = create_single_kernel_helper(gContext, &query, NULL, 1, &kernel, NULL); - if (NULL == query || error) - { - vlog_error( "Error: Unable to create program to get FP_ILOGB0 and FP_ILOGBNAN for the device. (%d)", error ); - return error; - } - - cl_kernel k = clCreateKernel( query, "GetILogBConstants", &error ); - if( NULL == k || error) + const char *kernelSource = + R"(__kernel void GetILogBConstants( __global int *out ) + { + out[0] = FP_ILOGB0; + out[1] = FP_ILOGBNAN; + })"; + + clProgramWrapper query; + clKernelWrapper kernel; + error = create_single_kernel_helper(gContext, &query, &kernel, 1, + &kernelSource, "GetILogBConstants"); + if (error != CL_SUCCESS) { - vlog_error( "Error: Unable to create kernel to get FP_ILOGB0 and FP_ILOGBNAN for the device. Err = %d", error ); + vlog_error("Error: Unable to create kernel to get FP_ILOGB0 and " + "FP_ILOGBNAN for the device. (%d)", + error); return error; } - if((error = clSetKernelArg(k, 0, sizeof( gOutBuffer[gMinVectorSizeIndex]), &gOutBuffer[gMinVectorSizeIndex]))) + if ((error = + clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]), + &gOutBuffer[gMinVectorSizeIndex]))) { - vlog_error( "Error: Unable to set kernel arg to get FP_ILOGB0 and FP_ILOGBNAN for the device. Err = %d", error ); + vlog_error("Error: Unable to set kernel arg to get FP_ILOGB0 and " + "FP_ILOGBNAN for the device. Err = %d", + error); return error; } size_t dim = 1; - if((error = clEnqueueNDRangeKernel(gQueue, k, 1, NULL, &dim, NULL, 0, NULL, NULL) )) + if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0, + NULL, NULL))) { - vlog_error( "Error: Unable to execute kernel to get FP_ILOGB0 and FP_ILOGBNAN for the device. Err = %d", error ); + vlog_error("Error: Unable to execute kernel to get FP_ILOGB0 and " + "FP_ILOGBNAN for the device. Err = %d", + error); return error; } - struct{ cl_int ilogb0, ilogbnan; }data; - if(( error = clEnqueueReadBuffer( gQueue, gOutBuffer[gMinVectorSizeIndex], CL_TRUE, 0, sizeof( data ), &data, 0, NULL, NULL))) + struct + { + cl_int ilogb0, ilogbnan; + } data; + if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex], + CL_TRUE, 0, sizeof(data), &data, 0, NULL, + NULL))) { - vlog_error( "Error: unable to read FP_ILOGB0 and FP_ILOGBNAN from the device. Err = %d", error ); + vlog_error("Error: unable to read FP_ILOGB0 and FP_ILOGBNAN from the " + "device. Err = %d", + error); return error; } gDeviceILogb0 = data.ilogb0; gDeviceILogbNaN = data.ilogbnan; - clReleaseKernel(k); - clReleaseProgram(query); - return 0; } -int IsTininessDetectedBeforeRounding( void ) +int IsTininessDetectedBeforeRounding(void) { int error; - const char *kernel = - "__kernel void IsTininessDetectedBeforeRounding( __global float *out )\n" - "{\n" - " volatile float a = 0x1.000002p-126f;\n" - " volatile float b = 0x1.fffffcp-1f;\n" // product is 0x1.fffffffffff8p-127 - " out[0] = a * b;\n" - "}\n"; - - cl_program query; - error = create_single_kernel_helper(gContext, &query, NULL, 1, &kernel, NULL); - if (error != CL_SUCCESS) { - vlog_error( "Error: Unable to create program to detect how tininess is detected for the device. (%d)", error ); - return error; - } - - cl_kernel k = clCreateKernel( query, "IsTininessDetectedBeforeRounding", &error ); - if( NULL == k || error) + const char *kernelSource = + R"(__kernel void IsTininessDetectedBeforeRounding( __global float *out ) + { + volatile float a = 0x1.000002p-126f; + volatile float b = 0x1.fffffcp-1f; + out[0] = a * b; // product is 0x1.fffffffffff8p-127 + })"; + + clProgramWrapper query; + clKernelWrapper kernel; + error = + create_single_kernel_helper(gContext, &query, &kernel, 1, &kernelSource, + "IsTininessDetectedBeforeRounding"); + if (error != CL_SUCCESS) { - vlog_error( "Error: Unable to create kernel to detect how tininess is detected for the device. Err = %d", error ); + vlog_error("Error: Unable to create kernel to detect how tininess is " + "detected for the device. (%d)", + error); return error; } - if((error = clSetKernelArg(k, 0, sizeof( gOutBuffer[gMinVectorSizeIndex]), &gOutBuffer[gMinVectorSizeIndex]))) + if ((error = + clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]), + &gOutBuffer[gMinVectorSizeIndex]))) { - vlog_error( "Error: Unable to set kernel arg to detect how tininess is detected for the device. Err = %d", error ); + vlog_error("Error: Unable to set kernel arg to detect how tininess is " + "detected for the device. Err = %d", + error); return error; } size_t dim = 1; - if((error = clEnqueueNDRangeKernel(gQueue, k, 1, NULL, &dim, NULL, 0, NULL, NULL) )) + if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0, + NULL, NULL))) { - vlog_error( "Error: Unable to execute kernel to detect how tininess is detected for the device. Err = %d", error ); + vlog_error("Error: Unable to execute kernel to detect how tininess is " + "detected for the device. Err = %d", + error); return error; } - struct{ cl_uint f; }data; - if(( error = clEnqueueReadBuffer( gQueue, gOutBuffer[gMinVectorSizeIndex], CL_TRUE, 0, sizeof( data ), &data, 0, NULL, NULL))) + struct { - vlog_error( "Error: unable to read result from tininess test from the device. Err = %d", error ); + cl_uint f; + } data; + if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex], + CL_TRUE, 0, sizeof(data), &data, 0, NULL, + NULL))) + { + vlog_error("Error: unable to read result from tininess test from the " + "device. Err = %d", + error); return error; } gCheckTininessBeforeRounding = 0 == (data.f & 0x7fffffff); - clReleaseKernel(k); - clReleaseProgram(query); - return 0; } @@ -1495,32 +1031,21 @@ int MakeKernel(const char **c, cl_uint count, const char *name, cl_kernel *k, int error = 0; char options[200] = ""; - if( gForceFTZ ) + if (gForceFTZ) { - strcat(options," -cl-denorms-are-zero"); + strcat(options, " -cl-denorms-are-zero"); } if (relaxedMode) { - strcat(options, " -cl-fast-relaxed-math"); + strcat(options, " -cl-fast-relaxed-math"); } - error = create_single_kernel_helper(gContext, p, NULL, count, c, NULL, options); + error = + create_single_kernel_helper(gContext, p, k, count, c, name, options); if (error != CL_SUCCESS) { - vlog_error("\t\tFAILED -- Failed to create program. (%d)\n", error); - return error; - } - - *k = clCreateKernel( *p, name, &error ); - if( NULL == *k || error ) - { - char buffer[2048] = ""; - - vlog_error("\t\tFAILED -- clCreateKernel() failed: (%d)\n", error); - clGetProgramBuildInfo(*p, gDevice, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, NULL); - vlog_error("Log: %s\n", buffer); - clReleaseProgram( *p ); + vlog_error("\t\tFAILED -- Failed to create kernel. (%d)\n", error); return error; } @@ -1531,45 +1056,43 @@ int MakeKernels(const char **c, cl_uint count, const char *name, cl_uint kernel_count, cl_kernel *k, cl_program *p, bool relaxedMode) { - int error = 0; - cl_uint i; char options[200] = ""; if (gForceFTZ) { - strcat(options," -cl-denorms-are-zero "); + strcat(options, " -cl-denorms-are-zero "); } - if( gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT ) + if (gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) { - strcat(options," -cl-fp32-correctly-rounded-divide-sqrt "); + strcat(options, " -cl-fp32-correctly-rounded-divide-sqrt "); } if (relaxedMode) { - strcat(options, " -cl-fast-relaxed-math"); + strcat(options, " -cl-fast-relaxed-math"); } - error = create_single_kernel_helper(gContext, p, NULL, count, c, NULL, options); - if ( error != CL_SUCCESS ) + int error = + create_single_kernel_helper(gContext, p, NULL, count, c, NULL, options); + if (error != CL_SUCCESS) { - vlog_error( "\t\tFAILED -- Failed to create program. (%d)\n", error ); + vlog_error("\t\tFAILED -- Failed to create program. (%d)\n", error); return error; } - - memset( k, 0, kernel_count * sizeof( *k) ); - for( i = 0; i< kernel_count; i++ ) + for (cl_uint i = 0; i < kernel_count; i++) { - k[i] = clCreateKernel( *p, name, &error ); - if( NULL == k[i]|| error ) + k[i] = clCreateKernel(*p, name, &error); + if (NULL == k[i] || error) { - char buffer[2048] = ""; + char buffer[2048] = ""; vlog_error("\t\tFAILED -- clCreateKernel() failed: (%d)\n", error); - clGetProgramBuildInfo(*p, gDevice, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, NULL); + clGetProgramBuildInfo(*p, gDevice, CL_PROGRAM_BUILD_LOG, + sizeof(buffer), buffer, NULL); vlog_error("Log: %s\n", buffer); - clReleaseProgram( *p ); + clReleaseProgram(*p); return error; } } @@ -1578,99 +1101,116 @@ int MakeKernels(const char **c, cl_uint count, const char *name, } -static int IsInRTZMode( void ) +static int IsInRTZMode(void) { int error; - const char *kernel = - "__kernel void GetRoundingMode( __global int *out )\n" - "{\n" - " volatile float a = 0x1.0p23f;\n" - " volatile float b = -0x1.0p23f;\n" - " out[0] = (a + 0x1.fffffep-1f == a) && (b - 0x1.fffffep-1f == b);\n" - "}\n"; - - cl_program query; - error = create_single_kernel_helper(gContext, &query, NULL, 1, &kernel, NULL); - if (error != CL_SUCCESS) { - vlog_error( "Error: Unable to create program to detect RTZ mode for the device. (%d)", error ); - return error; - } - - cl_kernel k = clCreateKernel( query, "GetRoundingMode", &error ); - if( NULL == k || error) + const char *kernelSource = + R"(__kernel void GetRoundingMode( __global int *out ) + { + volatile float a = 0x1.0p23f; + volatile float b = -0x1.0p23f; + out[0] = (a + 0x1.fffffep-1f == a) && (b - 0x1.fffffep-1f == b); + })"; + + clProgramWrapper query; + clKernelWrapper kernel; + error = create_single_kernel_helper(gContext, &query, &kernel, 1, + &kernelSource, "GetRoundingMode"); + if (error != CL_SUCCESS) { - vlog_error( "Error: Unable to create kernel to gdetect RTZ mode for the device. Err = %d", error ); + vlog_error("Error: Unable to create kernel to detect RTZ mode for the " + "device. (%d)", + error); return error; } - if((error = clSetKernelArg(k, 0, sizeof( gOutBuffer[gMinVectorSizeIndex]), &gOutBuffer[gMinVectorSizeIndex]))) + if ((error = + clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]), + &gOutBuffer[gMinVectorSizeIndex]))) { - vlog_error( "Error: Unable to set kernel arg to detect RTZ mode for the device. Err = %d", error ); + vlog_error("Error: Unable to set kernel arg to detect RTZ mode for the " + "device. Err = %d", + error); return error; } size_t dim = 1; - if((error = clEnqueueNDRangeKernel(gQueue, k, 1, NULL, &dim, NULL, 0, NULL, NULL) )) + if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0, + NULL, NULL))) { - vlog_error( "Error: Unable to execute kernel to detect RTZ mode for the device. Err = %d", error ); + vlog_error("Error: Unable to execute kernel to detect RTZ mode for the " + "device. Err = %d", + error); return error; } - struct{ cl_int isRTZ; }data; - if(( error = clEnqueueReadBuffer( gQueue, gOutBuffer[gMinVectorSizeIndex], CL_TRUE, 0, sizeof( data ), &data, 0, NULL, NULL))) + struct { - vlog_error( "Error: unable to read RTZ mode data from the device. Err = %d", error ); + cl_int isRTZ; + } data; + if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex], + CL_TRUE, 0, sizeof(data), &data, 0, NULL, + NULL))) + { + vlog_error( + "Error: unable to read RTZ mode data from the device. Err = %d", + error); return error; } - clReleaseKernel(k); - clReleaseProgram(query); - return data.isRTZ; } #pragma mark - -const char *sizeNames[ VECTOR_SIZE_COUNT] = { "", "2", "3", "4", "8", "16" }; -const int sizeValues[ VECTOR_SIZE_COUNT] = { 1, 2, 3, 4, 8, 16 }; - -// TODO: There is another version of Ulp_Error_Double defined in test_common/harness/errorHelpers.c -float Bruteforce_Ulp_Error_Double( double test, long double reference ) -{ -//Check for Non-power-of-two and NaN - - // Note: This function presumes that someone has already tested whether the result is correctly, - // rounded before calling this function. That test: - // - // if( (float) reference == test ) - // return 0.0f; - // - // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here. - // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded - // results. - - // Deal with long double = double - // On most systems long double is a higher precision type than double. They provide either - // a 80-bit or greater floating point type, or they provide a head-tail double double format. - // That is sufficient to represent the accuracy of a floating point result to many more bits - // than double and we can calculate sub-ulp errors. This is the standard system for which this - // test suite is designed. - // - // On some systems double and long double are the same thing. Then we run into a problem, - // because our representation of the infinitely precise result (passed in as reference above) - // can be off by as much as a half double precision ulp itself. In this case, we inflate the - // reported error by half an ulp to take this into account. A more correct and permanent fix - // would be to undertake refactoring the reference code to return results in this format: - // - // typedef struct DoubleReference - // { // true value = correctlyRoundedResult + ulps * ulp(correctlyRoundedResult) (infinitely precise) - // double correctlyRoundedResult; // as best we can - // double ulps; // plus a fractional amount to account for the difference - // }DoubleReference; // between infinitely precise result and correctlyRoundedResult, in units of ulps. - // - // This would provide a useful higher-than-double precision format for everyone that we can use, - // and would solve a few problems with representing absolute errors below DBL_MIN and over DBL_MAX for systems - // that use a head to tail double double for long double. +const char *sizeNames[VECTOR_SIZE_COUNT] = { "", "2", "3", "4", "8", "16" }; +const int sizeValues[VECTOR_SIZE_COUNT] = { 1, 2, 3, 4, 8, 16 }; + +// TODO: There is another version of Ulp_Error_Double defined in +// test_common/harness/errorHelpers.c +float Bruteforce_Ulp_Error_Double(double test, long double reference) +{ + // Check for Non-power-of-two and NaN + + // Note: This function presumes that someone has already tested whether the + // result is correctly, rounded before calling this function. That test: + // + // if( (float) reference == test ) + // return 0.0f; + // + // would ensure that cases like fabs(reference) > FLT_MAX are weeded out + // before we get here. Otherwise, we'll return inf ulp error here, for what + // are otherwise correctly rounded results. + + // Deal with long double = double + // On most systems long double is a higher precision type than double. They + // provide either a 80-bit or greater floating point type, or they provide a + // head-tail double double format. That is sufficient to represent the + // accuracy of a floating point result to many more bits than double and we + // can calculate sub-ulp errors. This is the standard system for which this + // test suite is designed. + // + // On some systems double and long double are the same thing. Then we run + // into a problem, because our representation of the infinitely precise + // result (passed in as reference above) can be off by as much as a half + // double precision ulp itself. In this case, we inflate the reported error + // by half an ulp to take this into account. A more correct and permanent + // fix would be to undertake refactoring the reference code to return + // results in this format: + // + // typedef struct DoubleReference + // { // true value = correctlyRoundedResult + ulps * + // ulp(correctlyRoundedResult) (infinitely precise) + // double correctlyRoundedResult; // as best we can + // double ulps; // plus a fractional amount to + // account for the difference + // }DoubleReference; // between infinitely + // precise result and correctlyRoundedResult, in units of ulps. + // + // This would provide a useful higher-than-double precision format for + // everyone that we can use, and would solve a few problems with + // representing absolute errors below DBL_MIN and over DBL_MAX for systems + // that use a head to tail double double for long double. int x; long double testVal = test; @@ -1678,119 +1218,73 @@ float Bruteforce_Ulp_Error_Double( double test, long double reference ) // First, handle special reference values if (isinf(reference)) { - if (reference == testVal) - return 0.0f; + if (reference == testVal) return 0.0f; - return INFINITY; + return INFINITY; } if (isnan(reference)) { - if (isnan(testVal)) - return 0.0f; + if (isnan(testVal)) return 0.0f; - return INFINITY; + return INFINITY; } - if ( 0.0L != reference && 0.5L != frexpl(reference, &x) ) + if (0.0L != reference && 0.5L != frexpl(reference, &x)) { // Non-zero and Non-power of two - // allow correctly rounded results to pass through unmolested. (We might add error to it below.) - // There is something of a performance optimization here. - if( testVal == reference ) - return 0.0f; + // allow correctly rounded results to pass through unmolested. (We might + // add error to it below.) There is something of a performance + // optimization here. + if (testVal == reference) return 0.0f; // The unbiased exponent of the ulp unit place - int ulp_exp = DBL_MANT_DIG - 1 - MAX( ilogbl( reference), DBL_MIN_EXP-1 ); + int ulp_exp = + DBL_MANT_DIG - 1 - std::max(ilogbl(reference), DBL_MIN_EXP - 1); // Scale the exponent of the error - float result = (float) scalbnl( testVal - reference, ulp_exp ); + float result = (float)scalbnl(testVal - reference, ulp_exp); - // account for rounding error in reference result on systems that do not have a higher precision floating point type (see above) - if( sizeof(long double) == sizeof( double ) ) - result += copysignf( 0.5f, result); + // account for rounding error in reference result on systems that do not + // have a higher precision floating point type (see above) + if (sizeof(long double) == sizeof(double)) + result += copysignf(0.5f, result); return result; } // reference is a normal power of two or a zero // The unbiased exponent of the ulp unit place - int ulp_exp = DBL_MANT_DIG - 1 - MAX( ilogbl( reference) - 1, DBL_MIN_EXP-1 ); + int ulp_exp = + DBL_MANT_DIG - 1 - std::max(ilogbl(reference) - 1, DBL_MIN_EXP - 1); - // allow correctly rounded results to pass through unmolested. (We might add error to it below.) - // There is something of a performance optimization here too. - if( testVal == reference ) - return 0.0f; + // allow correctly rounded results to pass through unmolested. (We might add + // error to it below.) There is something of a performance optimization here + // too. + if (testVal == reference) return 0.0f; // Scale the exponent of the error - float result = (float) scalbnl( testVal - reference, ulp_exp ); + float result = (float)scalbnl(testVal - reference, ulp_exp); - // account for rounding error in reference result on systems that do not have a higher precision floating point type (see above) - if( sizeof(long double) == sizeof( double ) ) - result += copysignf( 0.5f, result); + // account for rounding error in reference result on systems that do not + // have a higher precision floating point type (see above) + if (sizeof(long double) == sizeof(double)) + result += copysignf(0.5f, result); return result; } -float Abs_Error( float test, double reference ) +float Abs_Error(float test, double reference) { - if( isnan(test) && isnan(reference) ) - return 0.0f; - return fabs((float)(reference-(double)test)); + if (isnan(test) && isnan(reference)) return 0.0f; + return fabs((float)(reference - (double)test)); } -#if defined( __APPLE__ ) - #include -#endif - -uint64_t GetTime( void ) -{ -#if defined( __APPLE__ ) - return mach_absolute_time(); -#elif defined(_WIN32) && defined(_MSC_VER) - return ReadTime(); -#else - //mach_absolute_time is a high precision timer with precision < 1 microsecond. - #warning need accurate clock here. Times are invalid. - return 0; -#endif -} - - -#if defined(_WIN32) && defined (_MSC_VER) -/* function is defined in "compat.h" */ -#else -double SubtractTime( uint64_t endTime, uint64_t startTime ) -{ - uint64_t diff = endTime - startTime; - static double conversion = 0.0; - - if( 0.0 == conversion ) - { -#if defined( __APPLE__ ) - mach_timebase_info_data_t info = {0,0}; - kern_return_t err = mach_timebase_info( &info ); - if( 0 == err ) - conversion = 1e-9 * (double) info.numer / (double) info.denom; -#else - // This function consumes output from GetTime() above, and converts the time to secionds. - #warning need accurate ticks to seconds conversion factor here. Times are invalid. -#endif - } - - // strictly speaking we should also be subtracting out timer latency here - return conversion * (double) diff; -} -#endif - -cl_uint RoundUpToNextPowerOfTwo( cl_uint x ) +cl_uint RoundUpToNextPowerOfTwo(cl_uint x) { - if( 0 == (x & (x-1))) - return x; + if (0 == (x & (x - 1))) return x; - while( x & (x-1) ) - x &= x-1; + while (x & (x - 1)) x &= x - 1; - return x+x; + return x + x; } - diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp index 01c99c147d..0b037e01d8 100644 --- a/test_conformance/math_brute_force/reference_math.cpp +++ b/test_conformance/math_brute_force/reference_math.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -13,56 +13,55 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/compat.h" + #include "reference_math.h" -#include +#include "harness/compat.h" + +#include #if !defined(_WIN32) -#include +#include #endif -#include "Utility.h" +#include "utility.h" -#if defined( __SSE__ ) || (defined( _MSC_VER ) && (defined(_M_IX86) || defined(_M_X64))) - #include +#if defined(__SSE__) \ + || (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) +#include #endif -#if defined( __SSE2__ ) || (defined( _MSC_VER ) && (defined(_M_IX86) || defined(_M_X64))) - #include +#if defined(__SSE2__) \ + || (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) +#include #endif #ifndef M_PI_4 - #define M_PI_4 (M_PI/4) +#define M_PI_4 (M_PI / 4) #endif -#define EVALUATE( x ) x -#define CONCATENATE(x, y) x ## EVALUATE(y) - #pragma STDC FP_CONTRACT OFF static void __log2_ep(double *hi, double *lo, double x); -typedef union -{ +union uint64d_t { uint64_t i; double d; -}uint64d_t; +}; static const uint64d_t _CL_NAN = { 0x7ff8000000000000ULL }; #define cl_make_nan() _CL_NAN.d -static double reduce1( double x ); -static double reduce1( double x ) +static double reduce1(double x) { - if( fabs(x) >= HEX_DBL( +, 1, 0, +, 53 ) ) + if (fabs(x) >= HEX_DBL(+, 1, 0, +, 53)) { - if( fabs(x) == INFINITY ) - return cl_make_nan(); + if (fabs(x) == INFINITY) return cl_make_nan(); - return 0.0; //we patch up the sign for sinPi and cosPi later, since they need different signs + return 0.0; // we patch up the sign for sinPi and cosPi later, since + // they need different signs } // Find the nearest multiple of 2 - const double r = copysign( HEX_DBL( +, 1, 0, +, 53 ), x ); + const double r = copysign(HEX_DBL(+, 1, 0, +, 53), x); double z = x + r; z -= r; @@ -70,384 +69,374 @@ static double reduce1( double x ) return x - z; } -/* -static double reduceHalf( double x ); -static double reduceHalf( double x ) +double reference_acospi(double x) { return reference_acos(x) / M_PI; } +double reference_asinpi(double x) { return reference_asin(x) / M_PI; } +double reference_atanpi(double x) { return reference_atan(x) / M_PI; } +double reference_atan2pi(double y, double x) { - if( fabs(x) >= HEX_DBL( +, 1, 0, +, 52 ) ) - { - if( fabs(x) == INFINITY ) - return cl_make_nan(); - - return 0.0; //we patch up the sign for sinPi and cosPi later, since they need different signs - } - - // Find the nearest multiple of 1 - const double r = copysign( HEX_DBL( +, 1, 0, +, 52 ), x ); - double z = x + r; - z -= r; - - // subtract it from x. Value is now in the range -0.5 <= x <= 0.5 - return x - z; + return reference_atan2(y, x) / M_PI; } -*/ - -double reference_acospi( double x) { return reference_acos( x ) / M_PI; } -double reference_asinpi( double x) { return reference_asin( x ) / M_PI; } -double reference_atanpi( double x) { return reference_atan( x ) / M_PI; } -double reference_atan2pi( double y, double x ) { return reference_atan2( y, x) / M_PI; } -double reference_cospi( double x) +double reference_cospi(double x) { - if( reference_fabs(x) >= HEX_DBL( +, 1, 0, +, 52 ) ) + if (reference_fabs(x) >= HEX_DBL(+, 1, 0, +, 52)) { - if( reference_fabs(x) == INFINITY ) - return cl_make_nan(); + if (reference_fabs(x) == INFINITY) return cl_make_nan(); - //Note this probably fails for odd values between 0x1.0p52 and 0x1.0p53. - //However, when starting with single precision inputs, there will be no odd values. + // Note this probably fails for odd values between 0x1.0p52 and + // 0x1.0p53. However, when starting with single precision inputs, there + // will be no odd values. return 1.0; } - x = reduce1(x+0.5); + x = reduce1(x + 0.5); // reduce to [-0.5, 0.5] - if( x < -0.5 ) + if (x < -0.5) x = -1 - x; - else if ( x > 0.5 ) + else if (x > 0.5) x = 1 - x; // cosPi zeros are all +0 - if( x == 0.0 ) - return 0.0; + if (x == 0.0) return 0.0; - return reference_sin( x * M_PI ); + return reference_sin(x * M_PI); } double reference_relaxed_cospi(double x) { return reference_cospi(x); } -double reference_relaxed_divide( double x, double y ) { return (float)(((float) x ) / ( (float) y )); } +double reference_relaxed_divide(double x, double y) +{ + return (float)(((float)x) / ((float)y)); +} -double reference_divide( double x, double y ) { return x / y; } +double reference_divide(double x, double y) { return x / y; } // Add a + b. If the result modulo overflowed, write 1 to *carry, otherwise 0 -static inline cl_ulong add_carry( cl_ulong a, cl_ulong b, cl_ulong *carry ) +static inline cl_ulong add_carry(cl_ulong a, cl_ulong b, cl_ulong *carry) { cl_ulong result = a + b; *carry = result < a; return result; } -// Subtract a - b. If the result modulo overflowed, write 1 to *carry, otherwise 0 -static inline cl_ulong sub_carry( cl_ulong a, cl_ulong b, cl_ulong *carry ) +// Subtract a - b. If the result modulo overflowed, write 1 to *carry, otherwise +// 0 +static inline cl_ulong sub_carry(cl_ulong a, cl_ulong b, cl_ulong *carry) { cl_ulong result = a - b; *carry = result > a; return result; } -static float fallback_frexpf( float x, int *iptr ) +static float fallback_frexpf(float x, int *iptr) { cl_uint u, v; float fu, fv; - memcpy( &u, &x, sizeof(u)); + memcpy(&u, &x, sizeof(u)); - cl_uint exponent = u & 0x7f800000U; + cl_uint exponent = u & 0x7f800000U; cl_uint mantissa = u & ~0x7f800000U; // add 1 to the exponent exponent += 0x00800000U; - if( (cl_int) exponent < (cl_int) 0x01000000 ) + if ((cl_int)exponent < (cl_int)0x01000000) { // subnormal, NaN, Inf mantissa |= 0x3f000000U; v = mantissa & 0xff800000U; u = mantissa; - memcpy( &fv, &v, sizeof(v)); - memcpy( &fu, &u, sizeof(u)); + memcpy(&fv, &v, sizeof(v)); + memcpy(&fu, &u, sizeof(u)); fu -= fv; - memcpy( &v, &fv, sizeof(v)); - memcpy( &u, &fu, sizeof(u)); + memcpy(&v, &fv, sizeof(v)); + memcpy(&u, &fu, sizeof(u)); - exponent = u & 0x7f800000U; + exponent = u & 0x7f800000U; mantissa = u & ~0x7f800000U; - *iptr = (exponent >> 23) + (-126 + 1 -126); + *iptr = (exponent >> 23) + (-126 + 1 - 126); u = mantissa | 0x3f000000U; - memcpy( &fu, &u, sizeof(u)); + memcpy(&fu, &u, sizeof(u)); return fu; } *iptr = (exponent >> 23) - 127; u = mantissa | 0x3f000000U; - memcpy( &fu, &u, sizeof(u)); + memcpy(&fu, &u, sizeof(u)); return fu; } -static inline int extractf( float, cl_uint * ); -static inline int extractf( float x, cl_uint *mant ) +static inline int extractf(float x, cl_uint *mant) { - static float (*frexppf)(float, int*) = NULL; + static float (*frexppf)(float, int *) = NULL; int e; // verify that frexp works properly - if( NULL == frexppf ) + if (NULL == frexppf) { - if( 0.5f == frexpf( HEX_FLT( +, 1, 0, -, 130 ), &e ) && e == -129 ) + if (0.5f == frexpf(HEX_FLT(+, 1, 0, -, 130), &e) && e == -129) frexppf = frexpf; else frexppf = fallback_frexpf; } - *mant = (cl_uint) (HEX_FLT( +, 1, 0, +, 32 ) * fabsf( frexppf( x, &e ))); + *mant = (cl_uint)(HEX_FLT(+, 1, 0, +, 32) * fabsf(frexppf(x, &e))); return e - 1; } -// Shift right by shift bits. Any bits lost on the right side are bitwise OR'd together and ORd into the LSB of the result -static inline void shift_right_sticky_64( cl_ulong *p, int shift ); -static inline void shift_right_sticky_64( cl_ulong *p, int shift ) +// Shift right by shift bits. Any bits lost on the right side are bitwise OR'd +// together and ORd into the LSB of the result +static inline void shift_right_sticky_64(cl_ulong *p, int shift) { cl_ulong sticky = 0; cl_ulong r = *p; // C doesn't handle shifts greater than the size of the variable dependably - if( shift >= 64 ) + if (shift >= 64) { sticky |= (0 != r); r = 0; } else { - sticky |= (0 != (r << (64-shift))); + sticky |= (0 != (r << (64 - shift))); r >>= shift; } *p = r | sticky; } -// Add two 64 bit mantissas. Bits that are below the LSB of the result are OR'd into the LSB of the result -static inline void add64( cl_ulong *p, cl_ulong c, int *exponent ); -static inline void add64( cl_ulong *p, cl_ulong c, int *exponent ) +// Add two 64 bit mantissas. Bits that are below the LSB of the result are OR'd +// into the LSB of the result +static inline void add64(cl_ulong *p, cl_ulong c, int *exponent) { cl_ulong carry; c = add_carry(c, *p, &carry); - if( carry ) + if (carry) { - carry = c & 1; // set aside sticky bit - c >>= 1; // right shift to deal with overflow - c |= carry | 0x8000000000000000ULL; // or in carry bit, and sticky bit. The latter is to prevent rounding from believing we are exact half way case - *exponent = *exponent + 1; // adjust exponent + carry = c & 1; // set aside sticky bit + c >>= 1; // right shift to deal with overflow + c |= carry + | 0x8000000000000000ULL; // or in carry bit, and sticky bit. The + // latter is to prevent rounding from + // believing we are exact half way case + *exponent = *exponent + 1; // adjust exponent } *p = c; } // IEEE-754 round to nearest, ties to even rounding -static float round_to_nearest_even_float( cl_ulong p, int exponent ); -static float round_to_nearest_even_float( cl_ulong p, int exponent ) +static float round_to_nearest_even_float(cl_ulong p, int exponent) { - union{ cl_uint u; cl_float d;} u; + union { + cl_uint u; + cl_float d; + } u; // If mantissa is zero, return 0.0f if (p == 0) return 0.0f; // edges - if( exponent > 127 ) + if (exponent > 127) { - volatile float r = exponent * CL_FLT_MAX; // signal overflow + volatile float r = exponent * CL_FLT_MAX; // signal overflow // attempt to fool the compiler into not optimizing the above line away - if( r > CL_FLT_MAX ) - return INFINITY; + if (r > CL_FLT_MAX) return INFINITY; return r; } - if( exponent == -150 && p > 0x8000000000000000ULL) - return HEX_FLT( +, 1, 0, -, 149 ); - if( exponent <= -150 ) return 0.0f; + if (exponent == -150 && p > 0x8000000000000000ULL) + return HEX_FLT(+, 1, 0, -, 149); + if (exponent <= -150) return 0.0f; - //Figure out which bits go where + // Figure out which bits go where int shift = 8 + 32; - if( exponent < -126 ) + if (exponent < -126) { - shift -= 126 + exponent; // subnormal: shift is not 52 - exponent = -127; // set exponent to 0 + shift -= 126 + exponent; // subnormal: shift is not 52 + exponent = -127; // set exponent to 0 } else - p &= 0x7fffffffffffffffULL; // normal: leading bit is implicit. Remove it. + p &= 0x7fffffffffffffffULL; // normal: leading bit is implicit. Remove + // it. // Assemble the double (round toward zero) - u.u = (cl_uint)(p >> shift) | ((cl_uint) (exponent + 127) << 23); + u.u = (cl_uint)(p >> shift) | ((cl_uint)(exponent + 127) << 23); // put a representation of the residual bits into hi - p <<= (64-shift); + p <<= (64 - shift); - //round to nearest, ties to even based on the unused portion of p - if( p < 0x8000000000000000ULL ) return u.d; - if( p == 0x8000000000000000ULL ) u.u += u.u & 1U; - else u.u++; + // round to nearest, ties to even based on the unused portion of p + if (p < 0x8000000000000000ULL) return u.d; + if (p == 0x8000000000000000ULL) + u.u += u.u & 1U; + else + u.u++; return u.d; } -static float round_to_nearest_even_float_ftz( cl_ulong p, int exponent ); -static float round_to_nearest_even_float_ftz( cl_ulong p, int exponent ) +static float round_to_nearest_even_float_ftz(cl_ulong p, int exponent) { extern int gCheckTininessBeforeRounding; - union{ cl_uint u; cl_float d;} u; + union { + cl_uint u; + cl_float d; + } u; int shift = 8 + 32; // If mantissa is zero, return 0.0f if (p == 0) return 0.0f; // edges - if( exponent > 127 ) + if (exponent > 127) { - volatile float r = exponent * CL_FLT_MAX; // signal overflow + volatile float r = exponent * CL_FLT_MAX; // signal overflow // attempt to fool the compiler into not optimizing the above line away - if( r > CL_FLT_MAX ) - return INFINITY; + if (r > CL_FLT_MAX) return INFINITY; return r; } // Deal with FTZ for gCheckTininessBeforeRounding - if( exponent < (gCheckTininessBeforeRounding - 127) ) - return 0.0f; + if (exponent < (gCheckTininessBeforeRounding - 127)) return 0.0f; - if( exponent == -127 ) // only happens for machines that check tininess after rounding - p = (p&1) | (p>>1); + if (exponent + == -127) // only happens for machines that check tininess after rounding + p = (p & 1) | (p >> 1); else - p &= 0x7fffffffffffffffULL; // normal: leading bit is implicit. Remove it. + p &= 0x7fffffffffffffffULL; // normal: leading bit is implicit. Remove + // it. cl_ulong q = p; // Assemble the double (round toward zero) - u.u = (cl_uint)(q >> shift) | ((cl_uint) (exponent + 127) << 23); + u.u = (cl_uint)(q >> shift) | ((cl_uint)(exponent + 127) << 23); // put a representation of the residual bits into hi - q <<= (64-shift); + q <<= (64 - shift); - //round to nearest, ties to even based on the unused portion of p - if( q > 0x8000000000000000ULL ) + // round to nearest, ties to even based on the unused portion of p + if (q > 0x8000000000000000ULL) u.u++; - else if( q == 0x8000000000000000ULL ) + else if (q == 0x8000000000000000ULL) u.u += u.u & 1U; // Deal with FTZ for ! gCheckTininessBeforeRounding - if( 0 == (u.u & 0x7f800000U ) ) - return 0.0f; + if (0 == (u.u & 0x7f800000U)) return 0.0f; return u.d; } // IEEE-754 round toward zero. -static float round_toward_zero_float( cl_ulong p, int exponent ); -static float round_toward_zero_float( cl_ulong p, int exponent ) +static float round_toward_zero_float(cl_ulong p, int exponent) { - union{ cl_uint u; cl_float d;} u; + union { + cl_uint u; + cl_float d; + } u; // If mantissa is zero, return 0.0f if (p == 0) return 0.0f; // edges - if( exponent > 127 ) + if (exponent > 127) { - volatile float r = exponent * CL_FLT_MAX; // signal overflow + volatile float r = exponent * CL_FLT_MAX; // signal overflow // attempt to fool the compiler into not optimizing the above line away - if( r > CL_FLT_MAX ) - return CL_FLT_MAX; + if (r > CL_FLT_MAX) return CL_FLT_MAX; return r; } - if( exponent <= -149 ) - return 0.0f; + if (exponent <= -149) return 0.0f; - //Figure out which bits go where + // Figure out which bits go where int shift = 8 + 32; - if( exponent < -126 ) + if (exponent < -126) { - shift -= 126 + exponent; // subnormal: shift is not 52 - exponent = -127; // set exponent to 0 + shift -= 126 + exponent; // subnormal: shift is not 52 + exponent = -127; // set exponent to 0 } else - p &= 0x7fffffffffffffffULL; // normal: leading bit is implicit. Remove it. + p &= 0x7fffffffffffffffULL; // normal: leading bit is implicit. Remove + // it. // Assemble the double (round toward zero) - u.u = (cl_uint)(p >> shift) | ((cl_uint) (exponent + 127) << 23); + u.u = (cl_uint)(p >> shift) | ((cl_uint)(exponent + 127) << 23); return u.d; } -static float round_toward_zero_float_ftz( cl_ulong p, int exponent ); -static float round_toward_zero_float_ftz( cl_ulong p, int exponent ) +static float round_toward_zero_float_ftz(cl_ulong p, int exponent) { - extern int gCheckTininessBeforeRounding; - - union{ cl_uint u; cl_float d;} u; + union { + cl_uint u; + cl_float d; + } u; int shift = 8 + 32; // If mantissa is zero, return 0.0f if (p == 0) return 0.0f; // edges - if( exponent > 127 ) + if (exponent > 127) { - volatile float r = exponent * CL_FLT_MAX; // signal overflow + volatile float r = exponent * CL_FLT_MAX; // signal overflow // attempt to fool the compiler into not optimizing the above line away - if( r > CL_FLT_MAX ) - return CL_FLT_MAX; + if (r > CL_FLT_MAX) return CL_FLT_MAX; return r; } // Deal with FTZ for gCheckTininessBeforeRounding - if( exponent < -126 ) - return 0.0f; + if (exponent < -126) return 0.0f; - cl_ulong q = p &= 0x7fffffffffffffffULL; // normal: leading bit is implicit. Remove it. + cl_ulong q = p &= + 0x7fffffffffffffffULL; // normal: leading bit is implicit. Remove it. // Assemble the double (round toward zero) - u.u = (cl_uint)(q >> shift) | ((cl_uint) (exponent + 127) << 23); + u.u = (cl_uint)(q >> shift) | ((cl_uint)(exponent + 127) << 23); // put a representation of the residual bits into hi - q <<= (64-shift); + q <<= (64 - shift); return u.d; } // Subtract two significands. -static inline void sub64( cl_ulong *c, cl_ulong p, cl_uint *signC, int *expC ); -static inline void sub64( cl_ulong *c, cl_ulong p, cl_uint *signC, int *expC ) +static inline void sub64(cl_ulong *c, cl_ulong p, cl_uint *signC, int *expC) { cl_ulong carry; - p = sub_carry( *c, p, &carry ); + p = sub_carry(*c, p, &carry); - if( carry ) + if (carry) { *signC ^= 0x80000000U; p = -p; } // normalize - if( p ) + if (p) { int shift = 32; cl_ulong test = 1ULL << 32; - while( 0 == (p & 0x8000000000000000ULL)) + while (0 == (p & 0x8000000000000000ULL)) { - if( p < test ) + if (p < test) { p <<= shift; *expC = *expC - shift; @@ -460,49 +449,60 @@ static inline void sub64( cl_ulong *c, cl_ulong p, cl_uint *signC, int *expC ) { // zero result. *expC = -200; - *signC = 0; // IEEE rules say a - a = +0 for all rounding modes except -inf + *signC = + 0; // IEEE rules say a - a = +0 for all rounding modes except -inf } *c = p; } -float reference_fma( float a, float b, float c, int shouldFlush ) +float reference_fma(float a, float b, float c, int shouldFlush) { static const cl_uint kMSB = 0x80000000U; // Make bits accessible - union{ cl_uint u; cl_float d; } ua; ua.d = a; - union{ cl_uint u; cl_float d; } ub; ub.d = b; - union{ cl_uint u; cl_float d; } uc; uc.d = c; + union { + cl_uint u; + cl_float d; + } ua; + ua.d = a; + union { + cl_uint u; + cl_float d; + } ub; + ub.d = b; + union { + cl_uint u; + cl_float d; + } uc; + uc.d = c; // deal with Nans, infinities and zeros - if( isnan( a ) || isnan( b ) || isnan(c) || - isinf( a ) || isinf( b ) || isinf(c) || - 0 == ( ua.u & ~kMSB) || // a == 0, defeat host FTZ behavior - 0 == ( ub.u & ~kMSB) || // b == 0, defeat host FTZ behavior - 0 == ( uc.u & ~kMSB) ) // c == 0, defeat host FTZ behavior + if (isnan(a) || isnan(b) || isnan(c) || isinf(a) || isinf(b) || isinf(c) + || 0 == (ua.u & ~kMSB) || // a == 0, defeat host FTZ behavior + 0 == (ub.u & ~kMSB) || // b == 0, defeat host FTZ behavior + 0 == (uc.u & ~kMSB)) // c == 0, defeat host FTZ behavior { FPU_mode_type oldMode; RoundingMode oldRoundMode = kRoundToNearestEven; - if( isinf( c ) && !isinf(a) && !isinf(b) ) - return (c + a) + b; + if (isinf(c) && !isinf(a) && !isinf(b)) return (c + a) + b; - if (gIsInRTZMode) - oldRoundMode = set_round(kRoundTowardZero, kfloat); + if (gIsInRTZMode) oldRoundMode = set_round(kRoundTowardZero, kfloat); - memset( &oldMode, 0, sizeof( oldMode ) ); - if( shouldFlush ) - ForceFTZ( &oldMode ); + memset(&oldMode, 0, sizeof(oldMode)); + if (shouldFlush) ForceFTZ(&oldMode); - a = (float) reference_multiply( a, b ); // some risk that the compiler will insert a non-compliant fma here on some platforms. - a = (float) reference_add( a, c ); // We use STDC FP_CONTRACT OFF above to attempt to defeat that. + a = (float)reference_multiply( + a, b); // some risk that the compiler will insert a non-compliant + // fma here on some platforms. + a = (float)reference_add( + a, + c); // We use STDC FP_CONTRACT OFF above to attempt to defeat that. - if( shouldFlush ) - RestoreFPState( &oldMode ); + if (shouldFlush) RestoreFPState(&oldMode); - if( gIsInRTZMode ) - set_round(oldRoundMode, kfloat); + if (gIsInRTZMode) set_round(oldRoundMode, kfloat); return a; } @@ -510,67 +510,70 @@ float reference_fma( float a, float b, float c, int shouldFlush ) // exponent is a standard unbiased signed integer // mantissa is a cl_uint, with leading non-zero bit positioned at the MSB cl_uint mantA, mantB, mantC; - int expA = extractf( a, &mantA ); - int expB = extractf( b, &mantB ); - int expC = extractf( c, &mantC ); - cl_uint signC = uc.u & kMSB; // We'll need the sign bit of C later to decide if we are adding or subtracting + int expA = extractf(a, &mantA); + int expB = extractf(b, &mantB); + int expC = extractf(c, &mantC); + cl_uint signC = uc.u & kMSB; // We'll need the sign bit of C later to decide + // if we are adding or subtracting -// exact product of A and B + // exact product of A and B int exponent = expA + expB; cl_uint sign = (ua.u ^ ub.u) & kMSB; - cl_ulong product = (cl_ulong) mantA * (cl_ulong) mantB; + cl_ulong product = (cl_ulong)mantA * (cl_ulong)mantB; // renormalize -- 1.m * 1.n yields a number between 1.0 and 3.99999.. - // The MSB might not be set. If so, fix that. Otherwise, reflect the fact that we got another power of two from the multiplication - if( 0 == (0x8000000000000000ULL & product) ) + // The MSB might not be set. If so, fix that. Otherwise, reflect the fact + // that we got another power of two from the multiplication + if (0 == (0x8000000000000000ULL & product)) product <<= 1; else - exponent++; // 2**31 * 2**31 gives 2**62. If the MSB was set, then our exponent increased. + exponent++; // 2**31 * 2**31 gives 2**62. If the MSB was set, then our + // exponent increased. -//infinite precision add - cl_ulong addend = (cl_ulong) mantC << 32; - if( exponent >= expC ) + // infinite precision add + cl_ulong addend = (cl_ulong)mantC << 32; + if (exponent >= expC) { // Shift C relative to the product so that their exponents match - if( exponent > expC ) - shift_right_sticky_64( &addend, exponent - expC ); + if (exponent > expC) shift_right_sticky_64(&addend, exponent - expC); // Add - if( sign ^ signC ) - sub64( &product, addend, &sign, &exponent ); + if (sign ^ signC) + sub64(&product, addend, &sign, &exponent); else - add64( &product, addend, &exponent ); + add64(&product, addend, &exponent); } else { // Shift the product relative to C so that their exponents match - shift_right_sticky_64( &product, expC - exponent ); + shift_right_sticky_64(&product, expC - exponent); // add - if( sign ^ signC ) - sub64( &addend, product, &signC, &expC ); + if (sign ^ signC) + sub64(&addend, product, &signC, &expC); else - add64( &addend, product, &expC ); + add64(&addend, product, &expC); product = addend; exponent = expC; sign = signC; } - // round to IEEE result -- we do not do flushing to zero here. That part is handled manually in ternary.c. + // round to IEEE result -- we do not do flushing to zero here. That part is + // handled manually in ternary.c. if (gIsInRTZMode) { - if( shouldFlush ) - ua.d = round_toward_zero_float_ftz( product, exponent); + if (shouldFlush) + ua.d = round_toward_zero_float_ftz(product, exponent); else - ua.d = round_toward_zero_float( product, exponent); + ua.d = round_toward_zero_float(product, exponent); } else { - if( shouldFlush ) - ua.d = round_to_nearest_even_float_ftz( product, exponent); + if (shouldFlush) + ua.d = round_to_nearest_even_float_ftz(product, exponent); else - ua.d = round_to_nearest_even_float( product, exponent); + ua.d = round_to_nearest_even_float(product, exponent); } // Set the sign @@ -579,35 +582,36 @@ float reference_fma( float a, float b, float c, int shouldFlush ) return ua.d; } -double reference_relaxed_exp10( double x) +double reference_relaxed_exp10(double x) { return reference_exp10(x); } + +double reference_exp10(double x) { - return reference_exp10(x); + return reference_exp2(x * HEX_DBL(+, 1, a934f0979a371, +, 1)); } -double reference_exp10( double x) { return reference_exp2( x * HEX_DBL( +, 1, a934f0979a371, +, 1 ) ); } - -int reference_ilogb( double x ) +int reference_ilogb(double x) { extern int gDeviceILogb0, gDeviceILogbNaN; - union { cl_double f; cl_ulong u;} u; + union { + cl_double f; + cl_ulong u; + } u; - u.f = (float) x; - cl_int exponent = (cl_int) (u.u >> 52) & 0x7ff; - if( exponent == 0x7ff ) + u.f = (float)x; + cl_int exponent = (cl_int)(u.u >> 52) & 0x7ff; + if (exponent == 0x7ff) { - if( u.u & 0x000fffffffffffffULL ) - return gDeviceILogbNaN; + if (u.u & 0x000fffffffffffffULL) return gDeviceILogbNaN; return CL_INT_MAX; } - if( exponent == 0 ) - { // deal with denormals - u.f = x * HEX_DBL( +, 1, 0, +, 64 ); - exponent = (cl_int) (u.u >> 52) & 0x7ff; - if( exponent == 0 ) - return gDeviceILogb0; + if (exponent == 0) + { // deal with denormals + u.f = x * HEX_DBL(+, 1, 0, +, 64); + exponent = (cl_int)(u.u >> 52) & 0x7ff; + if (exponent == 0) return gDeviceILogb0; return exponent - (1023 + 64); } @@ -615,220 +619,205 @@ int reference_ilogb( double x ) return exponent - 1023; } -double reference_nan( cl_uint x ) +double reference_nan(cl_uint x) { - union{ cl_uint u; cl_float f; }u; + union { + cl_uint u; + cl_float f; + } u; u.u = x | 0x7fc00000U; - return (double) u.f; + return (double)u.f; } -double reference_maxmag( double x, double y ) +double reference_maxmag(double x, double y) { double fabsx = fabs(x); double fabsy = fabs(y); - if( fabsx < fabsy ) - return y; + if (fabsx < fabsy) return y; - if( fabsy < fabsx ) - return x; + if (fabsy < fabsx) return x; - return reference_fmax( x, y ); + return reference_fmax(x, y); } -double reference_minmag( double x, double y ) +double reference_minmag(double x, double y) { double fabsx = fabs(x); double fabsy = fabs(y); - if( fabsx > fabsy ) - return y; + if (fabsx > fabsy) return y; - if( fabsy > fabsx ) - return x; + if (fabsy > fabsx) return x; - return reference_fmin( x, y ); + return reference_fmin(x, y); } -//double my_nextafter( double x, double y ){ return (double) nextafterf( (float) x, (float) y ); } - -double reference_relaxed_mad( double a, double b, double c) +double reference_relaxed_mad(double a, double b, double c) { - return ((float) a )* ((float) b) + (float) c; + return ((float)a) * ((float)b) + (float)c; } -double reference_mad( double a, double b, double c ) -{ - return a * b + c; -} +double reference_mad(double a, double b, double c) { return a * b + c; } -double reference_recip( double x) { return 1.0 / x; } -double reference_rootn( double x, int i ) +double reference_recip(double x) { return 1.0 / x; } +double reference_rootn(double x, int i) { - //rootn ( x, 0 ) returns a NaN. - if( 0 == i ) - return cl_make_nan(); + // rootn ( x, 0 ) returns a NaN. + if (0 == i) return cl_make_nan(); - //rootn ( x, n ) returns a NaN for x < 0 and n is even. - if( x < 0 && 0 == (i&1) ) - return cl_make_nan(); + // rootn ( x, n ) returns a NaN for x < 0 and n is even. + if (x < 0 && 0 == (i & 1)) return cl_make_nan(); - if( x == 0.0 ) + if (x == 0.0) { - switch( i & 0x80000001 ) + switch (i & 0x80000001) { - //rootn ( +-0, n ) is +0 for even n > 0. - case 0: - return 0.0f; + // rootn ( +-0, n ) is +0 for even n > 0. + case 0: return 0.0f; - //rootn ( +-0, n ) is +-0 for odd n > 0. - case 1: - return x; + // rootn ( +-0, n ) is +-0 for odd n > 0. + case 1: return x; - //rootn ( +-0, n ) is +inf for even n < 0. - case 0x80000000: - return INFINITY; + // rootn ( +-0, n ) is +inf for even n < 0. + case 0x80000000: return INFINITY; - //rootn ( +-0, n ) is +-inf for odd n < 0. - case 0x80000001: - return copysign(INFINITY, x); + // rootn ( +-0, n ) is +-inf for odd n < 0. + case 0x80000001: return copysign(INFINITY, x); } } double sign = x; x = reference_fabs(x); - x = reference_exp2( reference_log2(x) / (double) i ); - return reference_copysignd( x, sign ); + x = reference_exp2(reference_log2(x) / (double)i); + return reference_copysignd(x, sign); } -double reference_rsqrt( double x) { return 1.0 / reference_sqrt(x); } -//double reference_sincos( double x, double *c ){ *c = cos(x); return sin(x); } -double reference_sinpi( double x) +double reference_rsqrt(double x) { return 1.0 / reference_sqrt(x); } + +double reference_sinpi(double x) { double r = reduce1(x); // reduce to [-0.5, 0.5] - if( r < -0.5 ) + if (r < -0.5) r = -1 - r; - else if ( r > 0.5 ) + else if (r > 0.5) r = 1 - r; // sinPi zeros have the same sign as x - if( r == 0.0 ) - return reference_copysignd(0.0, x); + if (r == 0.0) return reference_copysignd(0.0, x); - return reference_sin( r * M_PI ); + return reference_sin(r * M_PI); } double reference_relaxed_sinpi(double x) { return reference_sinpi(x); } -double reference_tanpi( double x) +double reference_tanpi(double x) { // set aside the sign (allows us to preserve sign of -0) - double sign = reference_copysignd( 1.0, x); + double sign = reference_copysignd(1.0, x); double z = reference_fabs(x); // if big and even -- caution: only works if x only has single precision - if( z >= HEX_DBL( +, 1, 0, +, 24 ) ) + if (z >= HEX_DBL(+, 1, 0, +, 24)) { - if( z == INFINITY ) - return x - x; // nan + if (z == INFINITY) return x - x; // nan - return reference_copysignd( 0.0, x); // tanpi ( n ) is copysign( 0.0, n) for even integers n. + return reference_copysignd( + 0.0, x); // tanpi ( n ) is copysign( 0.0, n) for even integers n. } // reduce to the range [ -0.5, 0.5 ] - double nearest = reference_rint( z ); // round to nearest even places n + 0.5 values in the right place for us - int i = (int) nearest; // test above against 0x1.0p24 avoids overflow here + double nearest = reference_rint(z); // round to nearest even places n + 0.5 + // values in the right place for us + int i = (int)nearest; // test above against 0x1.0p24 avoids overflow here z -= nearest; - //correction for odd integer x for the right sign of zero - if( (i&1) && z == 0.0 ) - sign = -sign; + // correction for odd integer x for the right sign of zero + if ((i & 1) && z == 0.0) sign = -sign; // track changes to the sign - sign *= reference_copysignd(1.0, z); // really should just be an xor - z = reference_fabs(z); // remove the sign again + sign *= reference_copysignd(1.0, z); // really should just be an xor + z = reference_fabs(z); // remove the sign again // reduce once more - // If we don't do this, rounding error in z * M_PI will cause us not to return infinities properly - if( z > 0.25 ) + // If we don't do this, rounding error in z * M_PI will cause us not to + // return infinities properly + if (z > 0.25) { z = 0.5 - z; - return sign / reference_tan( z * M_PI ); // use system tan to get the right result + return sign + / reference_tan(z * M_PI); // use system tan to get the right result } // - return sign * reference_tan( z * M_PI ); // use system tan to get the right result + return sign + * reference_tan(z * M_PI); // use system tan to get the right result } -double reference_pown( double x, int i) { return reference_pow( x, (double) i ); } -double reference_powr( double x, double y ) +double reference_pown(double x, int i) { return reference_pow(x, (double)i); } +double reference_powr(double x, double y) { - //powr ( x, y ) returns NaN for x < 0. - if( x < 0.0 ) - return cl_make_nan(); + // powr ( x, y ) returns NaN for x < 0. + if (x < 0.0) return cl_make_nan(); - //powr ( x, NaN ) returns the NaN for x >= 0. - //powr ( NaN, y ) returns the NaN. - if( isnan(x) || isnan(y) ) - return x + y; // Note: behavior different here than for pow(1,NaN), pow(NaN, 0) + // powr ( x, NaN ) returns the NaN for x >= 0. + // powr ( NaN, y ) returns the NaN. + if (isnan(x) || isnan(y)) + return x + y; // Note: behavior different here than for pow(1,NaN), + // pow(NaN, 0) - if( x == 1.0 ) + if (x == 1.0) { - //powr ( +1, +-inf ) returns NaN. - if( reference_fabs(y) == INFINITY ) - return cl_make_nan(); + // powr ( +1, +-inf ) returns NaN. + if (reference_fabs(y) == INFINITY) return cl_make_nan(); - //powr ( +1, y ) is 1 for finite y. (NaN handled above) + // powr ( +1, y ) is 1 for finite y. (NaN handled above) return 1.0; } - if( y == 0.0 ) + if (y == 0.0) { - //powr ( +inf, +-0 ) returns NaN. - //powr ( +-0, +-0 ) returns NaN. - if( x == 0.0 || x == INFINITY ) - return cl_make_nan(); + // powr ( +inf, +-0 ) returns NaN. + // powr ( +-0, +-0 ) returns NaN. + if (x == 0.0 || x == INFINITY) return cl_make_nan(); - //powr ( x, +-0 ) is 1 for finite x > 0. (x <= 0, NaN, INF already handled above) + // powr ( x, +-0 ) is 1 for finite x > 0. (x <= 0, NaN, INF already + // handled above) return 1.0; } - if( x == 0.0 ) + if (x == 0.0) { - //powr ( +-0, -inf) is +inf. - //powr ( +-0, y ) is +inf for finite y < 0. - if( y < 0.0 ) - return INFINITY; + // powr ( +-0, -inf) is +inf. + // powr ( +-0, y ) is +inf for finite y < 0. + if (y < 0.0) return INFINITY; - //powr ( +-0, y ) is +0 for y > 0. (NaN, y==0 handled above) + // powr ( +-0, y ) is +0 for y > 0. (NaN, y==0 handled above) return 0.0; } // x = +inf - if( isinf(x) ) + if (isinf(x)) { - if( y < 0 ) - return 0; + if (y < 0) return 0; return INFINITY; } double fabsx = reference_fabs(x); double fabsy = reference_fabs(y); - //y = +-inf cases - if( isinf(fabsy) ) + // y = +-inf cases + if (isinf(fabsy)) { - if( y < 0 ) + if (y < 0) { - if( fabsx < 1 ) - return INFINITY; + if (fabsx < 1) return INFINITY; return 0; } - if( fabsx < 1 ) - return 0; + if (fabsx < 1) return 0; return INFINITY; } @@ -840,169 +829,209 @@ double reference_powr( double x, double y ) return result; } -double reference_fract( double x, double *ip ) +double reference_fract(double x, double *ip) { - if(isnan(x)) { + if (isnan(x)) + { *ip = cl_make_nan(); return cl_make_nan(); } float i; - float f = modff((float) x, &i ); - if( f < 0.0 ) + float f = modff((float)x, &i); + if (f < 0.0) { f = 1.0f + f; i -= 1.0f; - if( f == 1.0f ) - f = HEX_FLT( +, 1, fffffe, -, 1 ); + if (f == 1.0f) f = HEX_FLT(+, 1, fffffe, -, 1); } *ip = i; return f; } -//double my_fdim( double x, double y){ return fdimf( (float) x, (float) y ); } -double reference_add( double x, double y ) +double reference_add(double x, double y) { - volatile float a = (float) x; - volatile float b = (float) y; + volatile float a = (float)x; + volatile float b = (float)y; -#if defined( __SSE__ ) || (defined( _MSC_VER ) && (defined(_M_IX86) || defined(_M_X64))) +#if defined(__SSE__) \ + || (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) // defeat x87 - __m128 va = _mm_set_ss( (float) a ); - __m128 vb = _mm_set_ss( (float) b ); - va = _mm_add_ss( va, vb ); - _mm_store_ss( (float*) &a, va ); + __m128 va = _mm_set_ss((float)a); + __m128 vb = _mm_set_ss((float)b); + va = _mm_add_ss(va, vb); + _mm_store_ss((float *)&a, va); #elif defined(__PPC__) - // Most Power host CPUs do not support the non-IEEE mode (NI) which flushes denorm's to zero. - // As such, the reference add with FTZ must be emulated in sw. - if (fpu_control & _FPU_MASK_NI) { - union{ cl_uint u; cl_float d; } ua; ua.d = a; - union{ cl_uint u; cl_float d; } ub; ub.d = b; - cl_uint mantA, mantB; - cl_ulong addendA, addendB, sum; - int expA = extractf( a, &mantA ); - int expB = extractf( b, &mantB ); - cl_uint signA = ua.u & 0x80000000U; - cl_uint signB = ub.u & 0x80000000U; - - // Force matching exponents if an operand is 0 - if (a == 0.0f) { - expA = expB; - } else if (b == 0.0f) { - expB = expA; - } - - addendA = (cl_ulong)mantA << 32; - addendB = (cl_ulong)mantB << 32; - - if (expA >= expB) { - // Shift B relative to the A so that their exponents match - if( expA > expB ) - shift_right_sticky_64( &addendB, expA - expB ); + // Most Power host CPUs do not support the non-IEEE mode (NI) which flushes + // denorm's to zero. As such, the reference add with FTZ must be emulated in + // sw. + if (fpu_control & _FPU_MASK_NI) + { + union { + cl_uint u; + cl_float d; + } ua; + ua.d = a; + union { + cl_uint u; + cl_float d; + } ub; + ub.d = b; + cl_uint mantA, mantB; + cl_ulong addendA, addendB, sum; + int expA = extractf(a, &mantA); + int expB = extractf(b, &mantB); + cl_uint signA = ua.u & 0x80000000U; + cl_uint signB = ub.u & 0x80000000U; + + // Force matching exponents if an operand is 0 + if (a == 0.0f) + { + expA = expB; + } + else if (b == 0.0f) + { + expB = expA; + } - // add - if( signA ^ signB ) - sub64( &addendA, addendB, &signA, &expA ); + addendA = (cl_ulong)mantA << 32; + addendB = (cl_ulong)mantB << 32; + + if (expA >= expB) + { + // Shift B relative to the A so that their exponents match + if (expA > expB) shift_right_sticky_64(&addendB, expA - expB); + + // add + if (signA ^ signB) + sub64(&addendA, addendB, &signA, &expA); + else + add64(&addendA, addendB, &expA); + } else - add64( &addendA, addendB, &expA ); - } else { - // Shift the A relative to B so that their exponents match - shift_right_sticky_64( &addendA, expB - expA ); + { + // Shift the A relative to B so that their exponents match + shift_right_sticky_64(&addendA, expB - expA); - // add - if( signA ^ signB ) - sub64( &addendB, addendA, &signB, &expB ); + // add + if (signA ^ signB) + sub64(&addendB, addendA, &signB, &expB); + else + add64(&addendB, addendA, &expB); + + addendA = addendB; + expA = expB; + signA = signB; + } + + // round to IEEE result + if (gIsInRTZMode) + { + ua.d = round_toward_zero_float_ftz(addendA, expA); + } else - add64( &addendB, addendA, &expB ); - - addendA = addendB; - expA = expB; - signA = signB; - } - - // round to IEEE result - if (gIsInRTZMode) { - ua.d = round_toward_zero_float_ftz( addendA, expA ); - } else { - ua.d = round_to_nearest_even_float_ftz( addendA, expA ); - } - // Set the sign - ua.u |= signA; - a = ua.d; - } else { - a += b; + { + ua.d = round_to_nearest_even_float_ftz(addendA, expA); + } + // Set the sign + ua.u |= signA; + a = ua.d; + } + else + { + a += b; } #else a += b; #endif - return (double) a; - } + return (double)a; +} -double reference_subtract( double x, double y ) +double reference_subtract(double x, double y) { - volatile float a = (float) x; - volatile float b = (float) y; -#if defined( __SSE__ ) || (defined( _MSC_VER ) && (defined(_M_IX86) || defined(_M_X64))) + volatile float a = (float)x; + volatile float b = (float)y; +#if defined(__SSE__) \ + || (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) // defeat x87 - __m128 va = _mm_set_ss( (float) a ); - __m128 vb = _mm_set_ss( (float) b ); - va = _mm_sub_ss( va, vb ); - _mm_store_ss( (float*) &a, va ); + __m128 va = _mm_set_ss((float)a); + __m128 vb = _mm_set_ss((float)b); + va = _mm_sub_ss(va, vb); + _mm_store_ss((float *)&a, va); #else a -= b; #endif return a; } -//double reference_divide( double x, double y ){ return (float) x / (float) y; } -double reference_multiply( double x, double y) +double reference_multiply(double x, double y) { - volatile float a = (float) x; - volatile float b = (float) y; -#if defined( __SSE__ ) || (defined( _MSC_VER ) && (defined(_M_IX86) || defined(_M_X64))) + volatile float a = (float)x; + volatile float b = (float)y; +#if defined(__SSE__) \ + || (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) // defeat x87 - __m128 va = _mm_set_ss( (float) a ); - __m128 vb = _mm_set_ss( (float) b ); - va = _mm_mul_ss( va, vb ); - _mm_store_ss( (float*) &a, va ); + __m128 va = _mm_set_ss((float)a); + __m128 vb = _mm_set_ss((float)b); + va = _mm_mul_ss(va, vb); + _mm_store_ss((float *)&a, va); #elif defined(__PPC__) - // Most Power host CPUs do not support the non-IEEE mode (NI) which flushes denorm's to zero. - // As such, the reference multiply with FTZ must be emulated in sw. - if (fpu_control & _FPU_MASK_NI) { - // extract exponent and mantissa - // exponent is a standard unbiased signed integer - // mantissa is a cl_uint, with leading non-zero bit positioned at the MSB - union{ cl_uint u; cl_float d; } ua; ua.d = a; - union{ cl_uint u; cl_float d; } ub; ub.d = b; - cl_uint mantA, mantB; - int expA = extractf( a, &mantA ); - int expB = extractf( b, &mantB ); - - // exact product of A and B - int exponent = expA + expB; - cl_uint sign = (ua.u ^ ub.u) & 0x80000000U; - cl_ulong product = (cl_ulong) mantA * (cl_ulong) mantB; - - // renormalize -- 1.m * 1.n yields a number between 1.0 and 3.99999.. - // The MSB might not be set. If so, fix that. Otherwise, reflect the fact that we got another power of two from the multiplication - if( 0 == (0x8000000000000000ULL & product) ) - product <<= 1; - else - exponent++; // 2**31 * 2**31 gives 2**62. If the MSB was set, then our exponent increased. - - // round to IEEE result -- we do not do flushing to zero here. That part is handled manually in ternary.c. - if (gIsInRTZMode) { - ua.d = round_toward_zero_float_ftz( product, exponent); - } else { - ua.d = round_to_nearest_even_float_ftz( product, exponent); - } - // Set the sign - ua.u |= sign; - a = ua.d; - } else { - a *= b; + // Most Power host CPUs do not support the non-IEEE mode (NI) which flushes + // denorm's to zero. As such, the reference multiply with FTZ must be + // emulated in sw. + if (fpu_control & _FPU_MASK_NI) + { + // extract exponent and mantissa + // exponent is a standard unbiased signed integer + // mantissa is a cl_uint, with leading non-zero bit positioned at the + // MSB + union { + cl_uint u; + cl_float d; + } ua; + ua.d = a; + union { + cl_uint u; + cl_float d; + } ub; + ub.d = b; + cl_uint mantA, mantB; + int expA = extractf(a, &mantA); + int expB = extractf(b, &mantB); + + // exact product of A and B + int exponent = expA + expB; + cl_uint sign = (ua.u ^ ub.u) & 0x80000000U; + cl_ulong product = (cl_ulong)mantA * (cl_ulong)mantB; + + // renormalize -- 1.m * 1.n yields a number between 1.0 and 3.99999.. + // The MSB might not be set. If so, fix that. Otherwise, reflect the + // fact that we got another power of two from the multiplication + if (0 == (0x8000000000000000ULL & product)) + product <<= 1; + else + exponent++; // 2**31 * 2**31 gives 2**62. If the MSB was set, then + // our exponent increased. + + // round to IEEE result -- we do not do flushing to zero here. That part + // is handled manually in ternary.c. + if (gIsInRTZMode) + { + ua.d = round_toward_zero_float_ftz(product, exponent); + } + else + { + ua.d = round_to_nearest_even_float_ftz(product, exponent); + } + // Set the sign + ua.u |= sign; + a = ua.d; + } + else + { + a *= b; } #else a *= b; @@ -1010,19 +1039,7 @@ double reference_multiply( double x, double y) return a; } -/*double my_remquo( double x, double y, int *iptr ) -{ - if( isnan(x) || isnan(y) || - fabs(x) == INFINITY || - y == 0.0 ) - { - *iptr = 0; - return NAN; - } - - return (double) remquof( (float) x, (float) y, iptr ); -}*/ -double reference_lgamma_r( double x, int *signp ) +double reference_lgamma_r(double x, int *signp) { // This is not currently tested *signp = 0; @@ -1030,266 +1047,276 @@ double reference_lgamma_r( double x, int *signp ) } -int reference_isequal( double x, double y ){ return x == y; } -int reference_isfinite( double x ){ return 0 != isfinite(x); } -int reference_isgreater( double x, double y ){ return x > y; } -int reference_isgreaterequal( double x, double y ){ return x >= y; } -int reference_isinf( double x ){ return 0 != isinf(x); } -int reference_isless( double x, double y ){ return x < y; } -int reference_islessequal( double x, double y ){ return x <= y; } -int reference_islessgreater( double x, double y ){ return 0 != islessgreater( x, y ); } -int reference_isnan( double x ){ return 0 != isnan( x ); } -int reference_isnormal( double x ){ return 0 != isnormal( (float) x ); } -int reference_isnotequal( double x, double y ){ return x != y; } -int reference_isordered( double x, double y){ return x == x && y == y; } -int reference_isunordered( double x, double y ){ return isnan(x) || isnan( y ); } -int reference_signbit( float x ){ return 0 != signbit( x ); } +int reference_isequal(double x, double y) { return x == y; } +int reference_isfinite(double x) { return 0 != isfinite(x); } +int reference_isgreater(double x, double y) { return x > y; } +int reference_isgreaterequal(double x, double y) { return x >= y; } +int reference_isinf(double x) { return 0 != isinf(x); } +int reference_isless(double x, double y) { return x < y; } +int reference_islessequal(double x, double y) { return x <= y; } +int reference_islessgreater(double x, double y) +{ + return 0 != islessgreater(x, y); +} +int reference_isnan(double x) { return 0 != isnan(x); } +int reference_isnormal(double x) { return 0 != isnormal((float)x); } +int reference_isnotequal(double x, double y) { return x != y; } +int reference_isordered(double x, double y) { return x == x && y == y; } +int reference_isunordered(double x, double y) { return isnan(x) || isnan(y); } +int reference_signbit(float x) { return 0 != signbit(x); } #if 1 // defined( _MSC_VER ) -//Missing functions for win32 +// Missing functions for win32 -float reference_copysign( float x, float y ) +float reference_copysign(float x, float y) { - union { float f; cl_uint u;} ux, uy; - ux.f = x; uy.f = y; + union { + float f; + cl_uint u; + } ux, uy; + ux.f = x; + uy.f = y; ux.u &= 0x7fffffffU; ux.u |= uy.u & 0x80000000U; return ux.f; } -double reference_copysignd( double x, double y ) +double reference_copysignd(double x, double y) { - union { double f; cl_ulong u;} ux, uy; - ux.f = x; uy.f = y; + union { + double f; + cl_ulong u; + } ux, uy; + ux.f = x; + uy.f = y; ux.u &= 0x7fffffffffffffffULL; ux.u |= uy.u & 0x8000000000000000ULL; return ux.f; } -double reference_round( double x ) +double reference_round(double x) { double absx = reference_fabs(x); - if( absx < 0.5 ) - return reference_copysignd( 0.0, x ); + if (absx < 0.5) return reference_copysignd(0.0, x); - if( absx < HEX_DBL( +, 1, 0, +, 53 ) ) - x = reference_trunc( x + reference_copysignd( 0.5, x ) ); + if (absx < HEX_DBL(+, 1, 0, +, 53)) + x = reference_trunc(x + reference_copysignd(0.5, x)); return x; } -double reference_trunc( double x ) +double reference_trunc(double x) { - if( fabs(x) < HEX_DBL( +, 1, 0, +, 53 ) ) + if (fabs(x) < HEX_DBL(+, 1, 0, +, 53)) { - cl_long l = (cl_long) x; + cl_long l = (cl_long)x; - return reference_copysignd( (double) l, x ); + return reference_copysignd((double)l, x); } return x; } #ifndef FP_ILOGB0 - #define FP_ILOGB0 INT_MIN +#define FP_ILOGB0 INT_MIN #endif #ifndef FP_ILOGBNAN - #define FP_ILOGBNAN INT_MAX +#define FP_ILOGBNAN INT_MAX #endif - -double reference_cbrt(double x){ return reference_copysignd( reference_pow( reference_fabs(x), 1.0/3.0 ), x ); } - -/* -double reference_scalbn(double x, int i) -{ // suitable for checking single precision scalbnf only - - if( i > 300 ) - return copysign( INFINITY, x); - if( i < -300 ) - return copysign( 0.0, x); - - union{ cl_ulong u; double d;} u; - u.u = ((cl_ulong) i + 1023) << 52; - - return x * u.d; +double reference_cbrt(double x) +{ + return reference_copysignd(reference_pow(reference_fabs(x), 1.0 / 3.0), x); } -*/ -double reference_rint( double x ) +double reference_rint(double x) { - if( reference_fabs(x) < HEX_DBL( +, 1, 0, +, 52 ) ) + if (reference_fabs(x) < HEX_DBL(+, 1, 0, +, 52)) { - double magic = reference_copysignd( HEX_DBL( +, 1, 0, +, 52 ), x ); + double magic = reference_copysignd(HEX_DBL(+, 1, 0, +, 52), x); double rounded = (x + magic) - magic; - x = reference_copysignd( rounded, x ); + x = reference_copysignd(rounded, x); } return x; } -double reference_acosh( double x ) +double reference_acosh(double x) { // not full precision. Sufficient precision to cover float - if( isnan(x) ) - return x + x; + if (isnan(x)) return x + x; - if( x < 1.0 ) - return cl_make_nan(); + if (x < 1.0) return cl_make_nan(); - return reference_log( x + reference_sqrt(x + 1) * reference_sqrt(x-1) ); + return reference_log(x + reference_sqrt(x + 1) * reference_sqrt(x - 1)); } -double reference_asinh( double x ) +double reference_asinh(double x) { -/* - * ==================================================== - * This function is from fdlibm: http://www.netlib.org - * It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunSoft, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - if( isnan(x) || isinf(x) ) - return x + x; + /* + * ==================================================== + * This function is from fdlibm: http://www.netlib.org + * It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + if (isnan(x) || isinf(x)) return x + x; double absx = reference_fabs(x); - if( absx < HEX_DBL( +, 1, 0, -, 28 ) ) - return x; + if (absx < HEX_DBL(+, 1, 0, -, 28)) return x; double sign = reference_copysignd(1.0, x); - if( absx > HEX_DBL( +, 1, 0, +, 28 ) ) - return sign * (reference_log( absx ) + 0.693147180559945309417232121458176568); // log(2) + if (absx > HEX_DBL(+, 1, 0, +, 28)) + return sign + * (reference_log(absx) + + 0.693147180559945309417232121458176568); // log(2) - if( absx > 2.0 ) - return sign * reference_log( 2.0 * absx + 1.0 / (reference_sqrt( x * x + 1.0 ) + absx)); + if (absx > 2.0) + return sign + * reference_log(2.0 * absx + + 1.0 / (reference_sqrt(x * x + 1.0) + absx)); - return sign * reference_log1p( absx + x*x / (1.0 + reference_sqrt(1.0 + x*x))); + return sign + * reference_log1p(absx + x * x / (1.0 + reference_sqrt(1.0 + x * x))); } -double reference_atanh( double x ) +double reference_atanh(double x) { -/* - * ==================================================== - * This function is from fdlibm: http://www.netlib.org - * It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunSoft, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - if( isnan(x) ) - return x + x; + /* + * ==================================================== + * This function is from fdlibm: http://www.netlib.org + * It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + if (isnan(x)) return x + x; - double signed_half = reference_copysignd( 0.5, x ); + double signed_half = reference_copysignd(0.5, x); x = reference_fabs(x); - if( x > 1.0 ) - return cl_make_nan(); + if (x > 1.0) return cl_make_nan(); - if( x < 0.5 ) - return signed_half * reference_log1p( 2.0 * ( x + x*x / (1-x) ) ); + if (x < 0.5) + return signed_half * reference_log1p(2.0 * (x + x * x / (1 - x))); - return signed_half * reference_log1p(2.0 * x / (1-x)); + return signed_half * reference_log1p(2.0 * x / (1 - x)); } double reference_relaxed_atan(double x) { return reference_atan(x); } -double reference_relaxed_exp2( double x ) -{ - return reference_exp2(x); -} +double reference_relaxed_exp2(double x) { return reference_exp2(x); } -double reference_exp2( double x ) -{ // Note: only suitable for verifying single precision. Doesn't have range of a full double exp2 implementation. - if( x == 0.0 ) - return 1.0; +double reference_exp2(double x) +{ // Note: only suitable for verifying single precision. Doesn't have range of a + // full double exp2 implementation. + if (x == 0.0) return 1.0; // separate x into fractional and integer parts - double i = reference_rint( x ); // round to nearest integer + double i = reference_rint(x); // round to nearest integer - if( i < -150 ) - return 0.0; + if (i < -150) return 0.0; - if( i > 129 ) - return INFINITY; + if (i > 129) return INFINITY; - double f = x - i; // -0.5 <= f <= 0.5 + double f = x - i; // -0.5 <= f <= 0.5 // find exp2(f) // calculate as p(f) = (exp2(f)-1)/f // exp2(f) = f * p(f) + 1 // p(f) is a minimax polynomial with error within 0x1.c1fd80f0d1ab7p-50 - double p = 0.693147180560184539289 + - (0.240226506955902863183 + - (0.055504108656833424373 + - (0.009618129212846484796 + - (0.001333355902958566035 + - (0.000154034191902497930 + - (0.000015252317761038105 + - (0.000001326283129417092 + 0.000000102593187638680 * f)*f)*f)*f)*f)*f)*f)*f; + double p = 0.693147180560184539289 + + (0.240226506955902863183 + + (0.055504108656833424373 + + (0.009618129212846484796 + + (0.001333355902958566035 + + (0.000154034191902497930 + + (0.000015252317761038105 + + (0.000001326283129417092 + + 0.000000102593187638680 * f) + * f) + * f) + * f) + * f) + * f) + * f) + * f; f *= p; f += 1.0; // scale by 2 ** i - union{ cl_ulong u; double d; } u; - int exponent = (int) i + 1023; - u.u = (cl_ulong) exponent << 52; + union { + cl_ulong u; + double d; + } u; + int exponent = (int)i + 1023; + u.u = (cl_ulong)exponent << 52; return f * u.d; } -double reference_expm1( double x ) -{ // Note: only suitable for verifying single precision. Doesn't have range of a full double expm1 implementation. It is only accurate to 47 bits or less. +double reference_expm1(double x) +{ // Note: only suitable for verifying single precision. Doesn't have range of a + // full double expm1 implementation. It is only accurate to 47 bits or less. // early out for small numbers and NaNs - if( ! (reference_fabs(x) > HEX_DBL( +, 1, 0, -, 24 )) ) - return x; + if (!(reference_fabs(x) > HEX_DBL(+, 1, 0, -, 24))) return x; // early out for large negative numbers - if( x < -130.0 ) - return -1.0; + if (x < -130.0) return -1.0; // early out for large positive numbers - if( x > 100.0 ) - return INFINITY; + if (x > 100.0) return INFINITY; // separate x into fractional and integer parts - double i = reference_rint( x ); // round to nearest integer - double f = x - i; // -0.5 <= f <= 0.5 + double i = reference_rint(x); // round to nearest integer + double f = x - i; // -0.5 <= f <= 0.5 // reduce f to the range -0.0625 .. f.. 0.0625 - int index = (int) (f * 16.0) + 8; // 0...16 + int index = (int)(f * 16.0) + 8; // 0...16 - static const double reduction[17] = { -0.5, -0.4375, -0.375, -0.3125, -0.25, -0.1875, -0.125, -0.0625, - 0.0, - +0.0625, +0.125, +0.1875, +0.25, +0.3125, +0.375, +0.4375, +0.5 }; + static const double reduction[17] = { -0.5, -0.4375, -0.375, -0.3125, + -0.25, -0.1875, -0.125, -0.0625, + 0.0, +0.0625, +0.125, +0.1875, + +0.25, +0.3125, +0.375, +0.4375, + +0.5 }; // exponentials[i] = expm1(reduction[i]) - static const double exponentials[17] = { HEX_DBL( -, 1, 92e9a0720d3ec, -, 2 ), HEX_DBL( -, 1, 6adb1cd9205ee, -, 2 ), - HEX_DBL( -, 1, 40373d42ce2e3, -, 2 ), HEX_DBL( -, 1, 12d35a41ba104, -, 2 ), - HEX_DBL( -, 1, c5041854df7d4, -, 3 ), HEX_DBL( -, 1, 5e25fb4fde211, -, 3 ), - HEX_DBL( -, 1, e14aed893eef4, -, 4 ), HEX_DBL( -, 1, f0540438fd5c3, -, 5 ), - HEX_DBL( +, 0, 0, +, 0 ), - HEX_DBL( +, 1, 082b577d34ed8, -, 4 ), HEX_DBL( +, 1, 10b022db7ae68, -, 3 ), - HEX_DBL( +, 1, a65c0b85ac1a9, -, 3 ), HEX_DBL( +, 1, 22d78f0fa061a, -, 2 ), - HEX_DBL( +, 1, 77a45d8117fd5, -, 2 ), HEX_DBL( +, 1, d1e944f6fbdaa, -, 2 ), - HEX_DBL( +, 1, 190048ef6002, -, 1 ), HEX_DBL( +, 1, 4c2531c3c0d38, -, 1 ), - }; + static const double exponentials[17] = { + HEX_DBL(-, 1, 92e9a0720d3ec, -, 2), + HEX_DBL(-, 1, 6adb1cd9205ee, -, 2), + HEX_DBL(-, 1, 40373d42ce2e3, -, 2), + HEX_DBL(-, 1, 12d35a41ba104, -, 2), + HEX_DBL(-, 1, c5041854df7d4, -, 3), + HEX_DBL(-, 1, 5e25fb4fde211, -, 3), + HEX_DBL(-, 1, e14aed893eef4, -, 4), + HEX_DBL(-, 1, f0540438fd5c3, -, 5), + HEX_DBL(+, 0, 0, +, 0), + HEX_DBL(+, 1, 082b577d34ed8, -, 4), + HEX_DBL(+, 1, 10b022db7ae68, -, 3), + HEX_DBL(+, 1, a65c0b85ac1a9, -, 3), + HEX_DBL(+, 1, 22d78f0fa061a, -, 2), + HEX_DBL(+, 1, 77a45d8117fd5, -, 2), + HEX_DBL(+, 1, d1e944f6fbdaa, -, 2), + HEX_DBL(+, 1, 190048ef6002, -, 1), + HEX_DBL(+, 1, 4c2531c3c0d38, -, 1), + }; f -= reduction[index]; @@ -1297,223 +1324,368 @@ double reference_expm1( double x ) // find expm1(f) // calculate as p(f) = (exp(f)-1)/f // expm1(f) = f * p(f) - // p(f) is a minimax polynomial with error within 0x1.1d7693618d001p-48 over the range +- 0.0625 - double p = 0.999999999999998001599 + - (0.499999999999839628284 + - (0.166666666672817459505 + - (0.041666666612283048687 + - (0.008333330214567431435 + - (0.001389005319303770070 + 0.000198833381525156667 * f)*f)*f)*f)*f)*f; + // p(f) is a minimax polynomial with error within 0x1.1d7693618d001p-48 over + // the range +- 0.0625 + double p = 0.999999999999998001599 + + (0.499999999999839628284 + + (0.166666666672817459505 + + (0.041666666612283048687 + + (0.008333330214567431435 + + (0.001389005319303770070 + 0.000198833381525156667 * f) + * f) + * f) + * f) + * f) + * f; f *= p; // expm1( reduced f ) // expm1(f) = (exmp1( reduced_f) + 1.0) * ( exponentials[index] + 1 ) - 1 - // = exmp1( reduced_f) * exponentials[index] + exmp1( reduced_f) + exponentials[index] + 1 -1 - // = exmp1( reduced_f) * exponentials[index] + exmp1( reduced_f) + exponentials[index] - f += exponentials[index] + f * exponentials[index]; + // = exmp1( reduced_f) * exponentials[index] + exmp1( reduced_f) + + // exponentials[index] + 1 -1 = exmp1( reduced_f) * + // exponentials[index] + exmp1( reduced_f) + exponentials[index] + f += exponentials[index] + f * exponentials[index]; // scale by e ** i - int exponent = (int) i; - if( 0 == exponent ) - return f; // precise answer for x near 1 + int exponent = (int)i; + if (0 == exponent) return f; // precise answer for x near 1 // table of e**(i-150) - static const double exp_table[128+150+1] = - { - HEX_DBL( +, 1, 82e16284f5ec5, -, 217 ), HEX_DBL( +, 1, 06e9996332ba1, -, 215 ), - HEX_DBL( +, 1, 6555cb289e44b, -, 214 ), HEX_DBL( +, 1, e5ab364643354, -, 213 ), - HEX_DBL( +, 1, 4a0bd18e64df7, -, 211 ), HEX_DBL( +, 1, c094499cc578e, -, 210 ), - HEX_DBL( +, 1, 30d759323998c, -, 208 ), HEX_DBL( +, 1, 9e5278ab1d4cf, -, 207 ), - HEX_DBL( +, 1, 198fa3f30be25, -, 205 ), HEX_DBL( +, 1, 7eae636d6144e, -, 204 ), - HEX_DBL( +, 1, 040f1036f4863, -, 202 ), HEX_DBL( +, 1, 6174e477a895f, -, 201 ), - HEX_DBL( +, 1, e065b82dd95a, -, 200 ), HEX_DBL( +, 1, 4676be491d129, -, 198 ), - HEX_DBL( +, 1, bbb5da5f7c823, -, 197 ), HEX_DBL( +, 1, 2d884eef5fdcb, -, 195 ), - HEX_DBL( +, 1, 99d3397ab8371, -, 194 ), HEX_DBL( +, 1, 1681497ed15b3, -, 192 ), - HEX_DBL( +, 1, 7a870f597fdbd, -, 191 ), HEX_DBL( +, 1, 013c74edba307, -, 189 ), - HEX_DBL( +, 1, 5d9ec4ada7938, -, 188 ), HEX_DBL( +, 1, db2edfd20fa7c, -, 187 ), - HEX_DBL( +, 1, 42eb9f39afb0b, -, 185 ), HEX_DBL( +, 1, b6e4f282b43f4, -, 184 ), - HEX_DBL( +, 1, 2a42764857b19, -, 182 ), HEX_DBL( +, 1, 9560792d19314, -, 181 ), - HEX_DBL( +, 1, 137b6ce8e052c, -, 179 ), HEX_DBL( +, 1, 766b45dd84f18, -, 178 ), - HEX_DBL( +, 1, fce362fe6e7d, -, 177 ), HEX_DBL( +, 1, 59d34dd8a5473, -, 175 ), - HEX_DBL( +, 1, d606847fc727a, -, 174 ), HEX_DBL( +, 1, 3f6a58b795de3, -, 172 ), - HEX_DBL( +, 1, b2216c6efdac1, -, 171 ), HEX_DBL( +, 1, 2705b5b153fb8, -, 169 ), - HEX_DBL( +, 1, 90fa1509bd50d, -, 168 ), HEX_DBL( +, 1, 107df698da211, -, 166 ), - HEX_DBL( +, 1, 725ae6e7b9d35, -, 165 ), HEX_DBL( +, 1, f75d6040aeff6, -, 164 ), - HEX_DBL( +, 1, 56126259e093c, -, 162 ), HEX_DBL( +, 1, d0ec7df4f7bd4, -, 161 ), - HEX_DBL( +, 1, 3bf2cf6722e46, -, 159 ), HEX_DBL( +, 1, ad6b22f55db42, -, 158 ), - HEX_DBL( +, 1, 23d1f3e5834a, -, 156 ), HEX_DBL( +, 1, 8c9feab89b876, -, 155 ), - HEX_DBL( +, 1, 0d88cf37f00dd, -, 153 ), HEX_DBL( +, 1, 6e55d2bf838a7, -, 152 ), - HEX_DBL( +, 1, f1e6b68529e33, -, 151 ), HEX_DBL( +, 1, 525be4e4e601d, -, 149 ), - HEX_DBL( +, 1, cbe0a45f75eb1, -, 148 ), HEX_DBL( +, 1, 3884e838aea68, -, 146 ), - HEX_DBL( +, 1, a8c1f14e2af5d, -, 145 ), HEX_DBL( +, 1, 20a717e64a9bd, -, 143 ), - HEX_DBL( +, 1, 8851d84118908, -, 142 ), HEX_DBL( +, 1, 0a9bdfb02d24, -, 140 ), - HEX_DBL( +, 1, 6a5bea046b42e, -, 139 ), HEX_DBL( +, 1, ec7f3b269efa8, -, 138 ), - HEX_DBL( +, 1, 4eafb87eab0f2, -, 136 ), HEX_DBL( +, 1, c6e2d05bbc, -, 135 ), - HEX_DBL( +, 1, 35208867c2683, -, 133 ), HEX_DBL( +, 1, a425b317eeacd, -, 132 ), - HEX_DBL( +, 1, 1d8508fa8246a, -, 130 ), HEX_DBL( +, 1, 840fbc08fdc8a, -, 129 ), - HEX_DBL( +, 1, 07b7112bc1ffe, -, 127 ), HEX_DBL( +, 1, 666d0dad2961d, -, 126 ), - HEX_DBL( +, 1, e726c3f64d0fe, -, 125 ), HEX_DBL( +, 1, 4b0dc07cabf98, -, 123 ), - HEX_DBL( +, 1, c1f2daf3b6a46, -, 122 ), HEX_DBL( +, 1, 31c5957a47de2, -, 120 ), - HEX_DBL( +, 1, 9f96445648b9f, -, 119 ), HEX_DBL( +, 1, 1a6baeadb4fd1, -, 117 ), - HEX_DBL( +, 1, 7fd974d372e45, -, 116 ), HEX_DBL( +, 1, 04da4d1452919, -, 114 ), - HEX_DBL( +, 1, 62891f06b345, -, 113 ), HEX_DBL( +, 1, e1dd273aa8a4a, -, 112 ), - HEX_DBL( +, 1, 4775e0840bfdd, -, 110 ), HEX_DBL( +, 1, bd109d9d94bda, -, 109 ), - HEX_DBL( +, 1, 2e73f53fba844, -, 107 ), HEX_DBL( +, 1, 9b138170d6bfe, -, 106 ), - HEX_DBL( +, 1, 175af0cf60ec5, -, 104 ), HEX_DBL( +, 1, 7baee1bffa80b, -, 103 ), - HEX_DBL( +, 1, 02057d1245ceb, -, 101 ), HEX_DBL( +, 1, 5eafffb34ba31, -, 100 ), - HEX_DBL( +, 1, dca23bae16424, -, 99 ), HEX_DBL( +, 1, 43e7fc88b8056, -, 97 ), - HEX_DBL( +, 1, b83bf23a9a9eb, -, 96 ), HEX_DBL( +, 1, 2b2b8dd05b318, -, 94 ), - HEX_DBL( +, 1, 969d47321e4cc, -, 93 ), HEX_DBL( +, 1, 1452b7723aed2, -, 91 ), - HEX_DBL( +, 1, 778fe2497184c, -, 90 ), HEX_DBL( +, 1, fe7116182e9cc, -, 89 ), - HEX_DBL( +, 1, 5ae191a99585a, -, 87 ), HEX_DBL( +, 1, d775d87da854d, -, 86 ), - HEX_DBL( +, 1, 4063f8cc8bb98, -, 84 ), HEX_DBL( +, 1, b374b315f87c1, -, 83 ), - HEX_DBL( +, 1, 27ec458c65e3c, -, 81 ), HEX_DBL( +, 1, 923372c67a074, -, 80 ), - HEX_DBL( +, 1, 1152eaeb73c08, -, 78 ), HEX_DBL( +, 1, 737c5645114b5, -, 77 ), - HEX_DBL( +, 1, f8e6c24b5592e, -, 76 ), HEX_DBL( +, 1, 571db733a9d61, -, 74 ), - HEX_DBL( +, 1, d257d547e083f, -, 73 ), HEX_DBL( +, 1, 3ce9b9de78f85, -, 71 ), - HEX_DBL( +, 1, aebabae3a41b5, -, 70 ), HEX_DBL( +, 1, 24b6031b49bda, -, 68 ), - HEX_DBL( +, 1, 8dd5e1bb09d7e, -, 67 ), HEX_DBL( +, 1, 0e5b73d1ff53d, -, 65 ), - HEX_DBL( +, 1, 6f741de1748ec, -, 64 ), HEX_DBL( +, 1, f36bd37f42f3e, -, 63 ), - HEX_DBL( +, 1, 536452ee2f75c, -, 61 ), HEX_DBL( +, 1, cd480a1b7482, -, 60 ), - HEX_DBL( +, 1, 39792499b1a24, -, 58 ), HEX_DBL( +, 1, aa0de4bf35b38, -, 57 ), - HEX_DBL( +, 1, 2188ad6ae3303, -, 55 ), HEX_DBL( +, 1, 898471fca6055, -, 54 ), - HEX_DBL( +, 1, 0b6c3afdde064, -, 52 ), HEX_DBL( +, 1, 6b7719a59f0e, -, 51 ), - HEX_DBL( +, 1, ee001eed62aa, -, 50 ), HEX_DBL( +, 1, 4fb547c775da8, -, 48 ), - HEX_DBL( +, 1, c8464f7616468, -, 47 ), HEX_DBL( +, 1, 36121e24d3bba, -, 45 ), - HEX_DBL( +, 1, a56e0c2ac7f75, -, 44 ), HEX_DBL( +, 1, 1e642baeb84a, -, 42 ), - HEX_DBL( +, 1, 853f01d6d53ba, -, 41 ), HEX_DBL( +, 1, 0885298767e9a, -, 39 ), - HEX_DBL( +, 1, 67852a7007e42, -, 38 ), HEX_DBL( +, 1, e8a37a45fc32e, -, 37 ), - HEX_DBL( +, 1, 4c1078fe9228a, -, 35 ), HEX_DBL( +, 1, c3527e433fab1, -, 34 ), - HEX_DBL( +, 1, 32b48bf117da2, -, 32 ), HEX_DBL( +, 1, a0db0d0ddb3ec, -, 31 ), - HEX_DBL( +, 1, 1b48655f37267, -, 29 ), HEX_DBL( +, 1, 81056ff2c5772, -, 28 ), - HEX_DBL( +, 1, 05a628c699fa1, -, 26 ), HEX_DBL( +, 1, 639e3175a689d, -, 25 ), - HEX_DBL( +, 1, e355bbaee85cb, -, 24 ), HEX_DBL( +, 1, 4875ca227ec38, -, 22 ), - HEX_DBL( +, 1, be6c6fdb01612, -, 21 ), HEX_DBL( +, 1, 2f6053b981d98, -, 19 ), - HEX_DBL( +, 1, 9c54c3b43bc8b, -, 18 ), HEX_DBL( +, 1, 18354238f6764, -, 16 ), - HEX_DBL( +, 1, 7cd79b5647c9b, -, 15 ), HEX_DBL( +, 1, 02cf22526545a, -, 13 ), - HEX_DBL( +, 1, 5fc21041027ad, -, 12 ), HEX_DBL( +, 1, de16b9c24a98f, -, 11 ), - HEX_DBL( +, 1, 44e51f113d4d6, -, 9 ), HEX_DBL( +, 1, b993fe00d5376, -, 8 ), - HEX_DBL( +, 1, 2c155b8213cf4, -, 6 ), HEX_DBL( +, 1, 97db0ccceb0af, -, 5 ), - HEX_DBL( +, 1, 152aaa3bf81cc, -, 3 ), HEX_DBL( +, 1, 78b56362cef38, -, 2 ), - HEX_DBL( +, 1, 0, +, 0 ), HEX_DBL( +, 1, 5bf0a8b145769, +, 1 ), - HEX_DBL( +, 1, d8e64b8d4ddae, +, 2 ), HEX_DBL( +, 1, 415e5bf6fb106, +, 4 ), - HEX_DBL( +, 1, b4c902e273a58, +, 5 ), HEX_DBL( +, 1, 28d389970338f, +, 7 ), - HEX_DBL( +, 1, 936dc5690c08f, +, 8 ), HEX_DBL( +, 1, 122885aaeddaa, +, 10 ), - HEX_DBL( +, 1, 749ea7d470c6e, +, 11 ), HEX_DBL( +, 1, fa7157c470f82, +, 12 ), - HEX_DBL( +, 1, 5829dcf95056, +, 14 ), HEX_DBL( +, 1, d3c4488ee4f7f, +, 15 ), - HEX_DBL( +, 1, 3de1654d37c9a, +, 17 ), HEX_DBL( +, 1, b00b5916ac955, +, 18 ), - HEX_DBL( +, 1, 259ac48bf05d7, +, 20 ), HEX_DBL( +, 1, 8f0ccafad2a87, +, 21 ), - HEX_DBL( +, 1, 0f2ebd0a8002, +, 23 ), HEX_DBL( +, 1, 709348c0ea4f9, +, 24 ), - HEX_DBL( +, 1, f4f22091940bd, +, 25 ), HEX_DBL( +, 1, 546d8f9ed26e1, +, 27 ), - HEX_DBL( +, 1, ceb088b68e804, +, 28 ), HEX_DBL( +, 1, 3a6e1fd9eecfd, +, 30 ), - HEX_DBL( +, 1, ab5adb9c436, +, 31 ), HEX_DBL( +, 1, 226af33b1fdc1, +, 33 ), - HEX_DBL( +, 1, 8ab7fb5475fb7, +, 34 ), HEX_DBL( +, 1, 0c3d3920962c9, +, 36 ), - HEX_DBL( +, 1, 6c932696a6b5d, +, 37 ), HEX_DBL( +, 1, ef822f7f6731d, +, 38 ), - HEX_DBL( +, 1, 50bba3796379a, +, 40 ), HEX_DBL( +, 1, c9aae4631c056, +, 41 ), - HEX_DBL( +, 1, 370470aec28ed, +, 43 ), HEX_DBL( +, 1, a6b765d8cdf6d, +, 44 ), - HEX_DBL( +, 1, 1f43fcc4b662c, +, 46 ), HEX_DBL( +, 1, 866f34a725782, +, 47 ), - HEX_DBL( +, 1, 0953e2f3a1ef7, +, 49 ), HEX_DBL( +, 1, 689e221bc8d5b, +, 50 ), - HEX_DBL( +, 1, ea215a1d20d76, +, 51 ), HEX_DBL( +, 1, 4d13fbb1a001a, +, 53 ), - HEX_DBL( +, 1, c4b334617cc67, +, 54 ), HEX_DBL( +, 1, 33a43d282a519, +, 56 ), - HEX_DBL( +, 1, a220d397972eb, +, 57 ), HEX_DBL( +, 1, 1c25c88df6862, +, 59 ), - HEX_DBL( +, 1, 8232558201159, +, 60 ), HEX_DBL( +, 1, 0672a3c9eb871, +, 62 ), - HEX_DBL( +, 1, 64b41c6d37832, +, 63 ), HEX_DBL( +, 1, e4cf766fe49be, +, 64 ), - HEX_DBL( +, 1, 49767bc0483e3, +, 66 ), HEX_DBL( +, 1, bfc951eb8bb76, +, 67 ), - HEX_DBL( +, 1, 304d6aeca254b, +, 69 ), HEX_DBL( +, 1, 9d97010884251, +, 70 ), - HEX_DBL( +, 1, 19103e4080b45, +, 72 ), HEX_DBL( +, 1, 7e013cd114461, +, 73 ), - HEX_DBL( +, 1, 03996528e074c, +, 75 ), HEX_DBL( +, 1, 60d4f6fdac731, +, 76 ), - HEX_DBL( +, 1, df8c5af17ba3b, +, 77 ), HEX_DBL( +, 1, 45e3076d61699, +, 79 ), - HEX_DBL( +, 1, baed16a6e0da7, +, 80 ), HEX_DBL( +, 1, 2cffdfebde1a1, +, 82 ), - HEX_DBL( +, 1, 9919cabefcb69, +, 83 ), HEX_DBL( +, 1, 160345c9953e3, +, 85 ), - HEX_DBL( +, 1, 79dbc9dc53c66, +, 86 ), HEX_DBL( +, 1, 00c810d464097, +, 88 ), - HEX_DBL( +, 1, 5d009394c5c27, +, 89 ), HEX_DBL( +, 1, da57de8f107a8, +, 90 ), - HEX_DBL( +, 1, 425982cf597cd, +, 92 ), HEX_DBL( +, 1, b61e5ca3a5e31, +, 93 ), - HEX_DBL( +, 1, 29bb825dfcf87, +, 95 ), HEX_DBL( +, 1, 94a90db0d6fe2, +, 96 ), - HEX_DBL( +, 1, 12fec759586fd, +, 98 ), HEX_DBL( +, 1, 75c1dc469e3af, +, 99 ), - HEX_DBL( +, 1, fbfd219c43b04, +, 100 ), HEX_DBL( +, 1, 5936d44e1a146, +, 102 ), - HEX_DBL( +, 1, d531d8a7ee79c, +, 103 ), HEX_DBL( +, 1, 3ed9d24a2d51b, +, 105 ), - HEX_DBL( +, 1, b15cfe5b6e17b, +, 106 ), HEX_DBL( +, 1, 268038c2c0e, +, 108 ), - HEX_DBL( +, 1, 9044a73545d48, +, 109 ), HEX_DBL( +, 1, 1002ab6218b38, +, 111 ), - HEX_DBL( +, 1, 71b3540cbf921, +, 112 ), HEX_DBL( +, 1, f6799ea9c414a, +, 113 ), - HEX_DBL( +, 1, 55779b984f3eb, +, 115 ), HEX_DBL( +, 1, d01a210c44aa4, +, 116 ), - HEX_DBL( +, 1, 3b63da8e9121, +, 118 ), HEX_DBL( +, 1, aca8d6b0116b8, +, 119 ), - HEX_DBL( +, 1, 234de9e0c74e9, +, 121 ), HEX_DBL( +, 1, 8bec7503ca477, +, 122 ), - HEX_DBL( +, 1, 0d0eda9796b9, +, 124 ), HEX_DBL( +, 1, 6db0118477245, +, 125 ), - HEX_DBL( +, 1, f1056dc7bf22d, +, 126 ), HEX_DBL( +, 1, 51c2cc3433801, +, 128 ), - HEX_DBL( +, 1, cb108ffbec164, +, 129 ), HEX_DBL( +, 1, 37f780991b584, +, 131 ), - HEX_DBL( +, 1, a801c0ea8ac4d, +, 132 ), HEX_DBL( +, 1, 20247cc4c46c1, +, 134 ), - HEX_DBL( +, 1, 87a0553328015, +, 135 ), HEX_DBL( +, 1, 0a233dee4f9bb, +, 137 ), - HEX_DBL( +, 1, 69b7f55b808ba, +, 138 ), HEX_DBL( +, 1, eba064644060a, +, 139 ), - HEX_DBL( +, 1, 4e184933d9364, +, 141 ), HEX_DBL( +, 1, c614fe2531841, +, 142 ), - HEX_DBL( +, 1, 3494a9b171bf5, +, 144 ), HEX_DBL( +, 1, a36798b9d969b, +, 145 ), - HEX_DBL( +, 1, 1d03d8c0c04af, +, 147 ), HEX_DBL( +, 1, 836026385c974, +, 148 ), - HEX_DBL( +, 1, 073fbe9ac901d, +, 150 ), HEX_DBL( +, 1, 65cae0969f286, +, 151 ), - HEX_DBL( +, 1, e64a58639cae8, +, 152 ), HEX_DBL( +, 1, 4a77f5f9b50f9, +, 154 ), - HEX_DBL( +, 1, c12744a3a28e3, +, 155 ), HEX_DBL( +, 1, 313b3b6978e85, +, 157 ), - HEX_DBL( +, 1, 9eda3a31e587e, +, 158 ), HEX_DBL( +, 1, 19ebe56b56453, +, 160 ), - HEX_DBL( +, 1, 7f2bc6e599b7e, +, 161 ), HEX_DBL( +, 1, 04644610df2ff, +, 163 ), - HEX_DBL( +, 1, 61e8b490ac4e6, +, 164 ), HEX_DBL( +, 1, e103201f299b3, +, 165 ), - HEX_DBL( +, 1, 46e1b637beaf5, +, 167 ), HEX_DBL( +, 1, bc473cfede104, +, 168 ), - HEX_DBL( +, 1, 2deb1b9c85e2d, +, 170 ), HEX_DBL( +, 1, 9a5981ca67d1, +, 171 ), - HEX_DBL( +, 1, 16dc8a9ef670b, +, 173 ), HEX_DBL( +, 1, 7b03166942309, +, 174 ), - HEX_DBL( +, 1, 0190be03150a7, +, 176 ), HEX_DBL( +, 1, 5e1152f9a8119, +, 177 ), - HEX_DBL( +, 1, dbca9263f8487, +, 178 ), HEX_DBL( +, 1, 43556dee93bee, +, 180 ), - HEX_DBL( +, 1, b774c12967dfa, +, 181 ), HEX_DBL( +, 1, 2aa4306e922c2, +, 183 ), - HEX_DBL( +, 1, 95e54c5dd4217, +, 184 ) }; - - // scale by e**i -- (expm1(f) + 1)*e**i - 1 = expm1(f) * e**i + e**i - 1 = e**i - return exp_table[exponent+150] + (f * exp_table[exponent+150] - 1.0); -} - - -double reference_fmax( double x, double y ) -{ - if( isnan(y) ) - return x; + static const double exp_table[128 + 150 + 1] = { + HEX_DBL(+, 1, 82e16284f5ec5, -, 217), + HEX_DBL(+, 1, 06e9996332ba1, -, 215), + HEX_DBL(+, 1, 6555cb289e44b, -, 214), + HEX_DBL(+, 1, e5ab364643354, -, 213), + HEX_DBL(+, 1, 4a0bd18e64df7, -, 211), + HEX_DBL(+, 1, c094499cc578e, -, 210), + HEX_DBL(+, 1, 30d759323998c, -, 208), + HEX_DBL(+, 1, 9e5278ab1d4cf, -, 207), + HEX_DBL(+, 1, 198fa3f30be25, -, 205), + HEX_DBL(+, 1, 7eae636d6144e, -, 204), + HEX_DBL(+, 1, 040f1036f4863, -, 202), + HEX_DBL(+, 1, 6174e477a895f, -, 201), + HEX_DBL(+, 1, e065b82dd95a, -, 200), + HEX_DBL(+, 1, 4676be491d129, -, 198), + HEX_DBL(+, 1, bbb5da5f7c823, -, 197), + HEX_DBL(+, 1, 2d884eef5fdcb, -, 195), + HEX_DBL(+, 1, 99d3397ab8371, -, 194), + HEX_DBL(+, 1, 1681497ed15b3, -, 192), + HEX_DBL(+, 1, 7a870f597fdbd, -, 191), + HEX_DBL(+, 1, 013c74edba307, -, 189), + HEX_DBL(+, 1, 5d9ec4ada7938, -, 188), + HEX_DBL(+, 1, db2edfd20fa7c, -, 187), + HEX_DBL(+, 1, 42eb9f39afb0b, -, 185), + HEX_DBL(+, 1, b6e4f282b43f4, -, 184), + HEX_DBL(+, 1, 2a42764857b19, -, 182), + HEX_DBL(+, 1, 9560792d19314, -, 181), + HEX_DBL(+, 1, 137b6ce8e052c, -, 179), + HEX_DBL(+, 1, 766b45dd84f18, -, 178), + HEX_DBL(+, 1, fce362fe6e7d, -, 177), + HEX_DBL(+, 1, 59d34dd8a5473, -, 175), + HEX_DBL(+, 1, d606847fc727a, -, 174), + HEX_DBL(+, 1, 3f6a58b795de3, -, 172), + HEX_DBL(+, 1, b2216c6efdac1, -, 171), + HEX_DBL(+, 1, 2705b5b153fb8, -, 169), + HEX_DBL(+, 1, 90fa1509bd50d, -, 168), + HEX_DBL(+, 1, 107df698da211, -, 166), + HEX_DBL(+, 1, 725ae6e7b9d35, -, 165), + HEX_DBL(+, 1, f75d6040aeff6, -, 164), + HEX_DBL(+, 1, 56126259e093c, -, 162), + HEX_DBL(+, 1, d0ec7df4f7bd4, -, 161), + HEX_DBL(+, 1, 3bf2cf6722e46, -, 159), + HEX_DBL(+, 1, ad6b22f55db42, -, 158), + HEX_DBL(+, 1, 23d1f3e5834a, -, 156), + HEX_DBL(+, 1, 8c9feab89b876, -, 155), + HEX_DBL(+, 1, 0d88cf37f00dd, -, 153), + HEX_DBL(+, 1, 6e55d2bf838a7, -, 152), + HEX_DBL(+, 1, f1e6b68529e33, -, 151), + HEX_DBL(+, 1, 525be4e4e601d, -, 149), + HEX_DBL(+, 1, cbe0a45f75eb1, -, 148), + HEX_DBL(+, 1, 3884e838aea68, -, 146), + HEX_DBL(+, 1, a8c1f14e2af5d, -, 145), + HEX_DBL(+, 1, 20a717e64a9bd, -, 143), + HEX_DBL(+, 1, 8851d84118908, -, 142), + HEX_DBL(+, 1, 0a9bdfb02d24, -, 140), + HEX_DBL(+, 1, 6a5bea046b42e, -, 139), + HEX_DBL(+, 1, ec7f3b269efa8, -, 138), + HEX_DBL(+, 1, 4eafb87eab0f2, -, 136), + HEX_DBL(+, 1, c6e2d05bbc, -, 135), + HEX_DBL(+, 1, 35208867c2683, -, 133), + HEX_DBL(+, 1, a425b317eeacd, -, 132), + HEX_DBL(+, 1, 1d8508fa8246a, -, 130), + HEX_DBL(+, 1, 840fbc08fdc8a, -, 129), + HEX_DBL(+, 1, 07b7112bc1ffe, -, 127), + HEX_DBL(+, 1, 666d0dad2961d, -, 126), + HEX_DBL(+, 1, e726c3f64d0fe, -, 125), + HEX_DBL(+, 1, 4b0dc07cabf98, -, 123), + HEX_DBL(+, 1, c1f2daf3b6a46, -, 122), + HEX_DBL(+, 1, 31c5957a47de2, -, 120), + HEX_DBL(+, 1, 9f96445648b9f, -, 119), + HEX_DBL(+, 1, 1a6baeadb4fd1, -, 117), + HEX_DBL(+, 1, 7fd974d372e45, -, 116), + HEX_DBL(+, 1, 04da4d1452919, -, 114), + HEX_DBL(+, 1, 62891f06b345, -, 113), + HEX_DBL(+, 1, e1dd273aa8a4a, -, 112), + HEX_DBL(+, 1, 4775e0840bfdd, -, 110), + HEX_DBL(+, 1, bd109d9d94bda, -, 109), + HEX_DBL(+, 1, 2e73f53fba844, -, 107), + HEX_DBL(+, 1, 9b138170d6bfe, -, 106), + HEX_DBL(+, 1, 175af0cf60ec5, -, 104), + HEX_DBL(+, 1, 7baee1bffa80b, -, 103), + HEX_DBL(+, 1, 02057d1245ceb, -, 101), + HEX_DBL(+, 1, 5eafffb34ba31, -, 100), + HEX_DBL(+, 1, dca23bae16424, -, 99), + HEX_DBL(+, 1, 43e7fc88b8056, -, 97), + HEX_DBL(+, 1, b83bf23a9a9eb, -, 96), + HEX_DBL(+, 1, 2b2b8dd05b318, -, 94), + HEX_DBL(+, 1, 969d47321e4cc, -, 93), + HEX_DBL(+, 1, 1452b7723aed2, -, 91), + HEX_DBL(+, 1, 778fe2497184c, -, 90), + HEX_DBL(+, 1, fe7116182e9cc, -, 89), + HEX_DBL(+, 1, 5ae191a99585a, -, 87), + HEX_DBL(+, 1, d775d87da854d, -, 86), + HEX_DBL(+, 1, 4063f8cc8bb98, -, 84), + HEX_DBL(+, 1, b374b315f87c1, -, 83), + HEX_DBL(+, 1, 27ec458c65e3c, -, 81), + HEX_DBL(+, 1, 923372c67a074, -, 80), + HEX_DBL(+, 1, 1152eaeb73c08, -, 78), + HEX_DBL(+, 1, 737c5645114b5, -, 77), + HEX_DBL(+, 1, f8e6c24b5592e, -, 76), + HEX_DBL(+, 1, 571db733a9d61, -, 74), + HEX_DBL(+, 1, d257d547e083f, -, 73), + HEX_DBL(+, 1, 3ce9b9de78f85, -, 71), + HEX_DBL(+, 1, aebabae3a41b5, -, 70), + HEX_DBL(+, 1, 24b6031b49bda, -, 68), + HEX_DBL(+, 1, 8dd5e1bb09d7e, -, 67), + HEX_DBL(+, 1, 0e5b73d1ff53d, -, 65), + HEX_DBL(+, 1, 6f741de1748ec, -, 64), + HEX_DBL(+, 1, f36bd37f42f3e, -, 63), + HEX_DBL(+, 1, 536452ee2f75c, -, 61), + HEX_DBL(+, 1, cd480a1b7482, -, 60), + HEX_DBL(+, 1, 39792499b1a24, -, 58), + HEX_DBL(+, 1, aa0de4bf35b38, -, 57), + HEX_DBL(+, 1, 2188ad6ae3303, -, 55), + HEX_DBL(+, 1, 898471fca6055, -, 54), + HEX_DBL(+, 1, 0b6c3afdde064, -, 52), + HEX_DBL(+, 1, 6b7719a59f0e, -, 51), + HEX_DBL(+, 1, ee001eed62aa, -, 50), + HEX_DBL(+, 1, 4fb547c775da8, -, 48), + HEX_DBL(+, 1, c8464f7616468, -, 47), + HEX_DBL(+, 1, 36121e24d3bba, -, 45), + HEX_DBL(+, 1, a56e0c2ac7f75, -, 44), + HEX_DBL(+, 1, 1e642baeb84a, -, 42), + HEX_DBL(+, 1, 853f01d6d53ba, -, 41), + HEX_DBL(+, 1, 0885298767e9a, -, 39), + HEX_DBL(+, 1, 67852a7007e42, -, 38), + HEX_DBL(+, 1, e8a37a45fc32e, -, 37), + HEX_DBL(+, 1, 4c1078fe9228a, -, 35), + HEX_DBL(+, 1, c3527e433fab1, -, 34), + HEX_DBL(+, 1, 32b48bf117da2, -, 32), + HEX_DBL(+, 1, a0db0d0ddb3ec, -, 31), + HEX_DBL(+, 1, 1b48655f37267, -, 29), + HEX_DBL(+, 1, 81056ff2c5772, -, 28), + HEX_DBL(+, 1, 05a628c699fa1, -, 26), + HEX_DBL(+, 1, 639e3175a689d, -, 25), + HEX_DBL(+, 1, e355bbaee85cb, -, 24), + HEX_DBL(+, 1, 4875ca227ec38, -, 22), + HEX_DBL(+, 1, be6c6fdb01612, -, 21), + HEX_DBL(+, 1, 2f6053b981d98, -, 19), + HEX_DBL(+, 1, 9c54c3b43bc8b, -, 18), + HEX_DBL(+, 1, 18354238f6764, -, 16), + HEX_DBL(+, 1, 7cd79b5647c9b, -, 15), + HEX_DBL(+, 1, 02cf22526545a, -, 13), + HEX_DBL(+, 1, 5fc21041027ad, -, 12), + HEX_DBL(+, 1, de16b9c24a98f, -, 11), + HEX_DBL(+, 1, 44e51f113d4d6, -, 9), + HEX_DBL(+, 1, b993fe00d5376, -, 8), + HEX_DBL(+, 1, 2c155b8213cf4, -, 6), + HEX_DBL(+, 1, 97db0ccceb0af, -, 5), + HEX_DBL(+, 1, 152aaa3bf81cc, -, 3), + HEX_DBL(+, 1, 78b56362cef38, -, 2), + HEX_DBL(+, 1, 0, +, 0), + HEX_DBL(+, 1, 5bf0a8b145769, +, 1), + HEX_DBL(+, 1, d8e64b8d4ddae, +, 2), + HEX_DBL(+, 1, 415e5bf6fb106, +, 4), + HEX_DBL(+, 1, b4c902e273a58, +, 5), + HEX_DBL(+, 1, 28d389970338f, +, 7), + HEX_DBL(+, 1, 936dc5690c08f, +, 8), + HEX_DBL(+, 1, 122885aaeddaa, +, 10), + HEX_DBL(+, 1, 749ea7d470c6e, +, 11), + HEX_DBL(+, 1, fa7157c470f82, +, 12), + HEX_DBL(+, 1, 5829dcf95056, +, 14), + HEX_DBL(+, 1, d3c4488ee4f7f, +, 15), + HEX_DBL(+, 1, 3de1654d37c9a, +, 17), + HEX_DBL(+, 1, b00b5916ac955, +, 18), + HEX_DBL(+, 1, 259ac48bf05d7, +, 20), + HEX_DBL(+, 1, 8f0ccafad2a87, +, 21), + HEX_DBL(+, 1, 0f2ebd0a8002, +, 23), + HEX_DBL(+, 1, 709348c0ea4f9, +, 24), + HEX_DBL(+, 1, f4f22091940bd, +, 25), + HEX_DBL(+, 1, 546d8f9ed26e1, +, 27), + HEX_DBL(+, 1, ceb088b68e804, +, 28), + HEX_DBL(+, 1, 3a6e1fd9eecfd, +, 30), + HEX_DBL(+, 1, ab5adb9c436, +, 31), + HEX_DBL(+, 1, 226af33b1fdc1, +, 33), + HEX_DBL(+, 1, 8ab7fb5475fb7, +, 34), + HEX_DBL(+, 1, 0c3d3920962c9, +, 36), + HEX_DBL(+, 1, 6c932696a6b5d, +, 37), + HEX_DBL(+, 1, ef822f7f6731d, +, 38), + HEX_DBL(+, 1, 50bba3796379a, +, 40), + HEX_DBL(+, 1, c9aae4631c056, +, 41), + HEX_DBL(+, 1, 370470aec28ed, +, 43), + HEX_DBL(+, 1, a6b765d8cdf6d, +, 44), + HEX_DBL(+, 1, 1f43fcc4b662c, +, 46), + HEX_DBL(+, 1, 866f34a725782, +, 47), + HEX_DBL(+, 1, 0953e2f3a1ef7, +, 49), + HEX_DBL(+, 1, 689e221bc8d5b, +, 50), + HEX_DBL(+, 1, ea215a1d20d76, +, 51), + HEX_DBL(+, 1, 4d13fbb1a001a, +, 53), + HEX_DBL(+, 1, c4b334617cc67, +, 54), + HEX_DBL(+, 1, 33a43d282a519, +, 56), + HEX_DBL(+, 1, a220d397972eb, +, 57), + HEX_DBL(+, 1, 1c25c88df6862, +, 59), + HEX_DBL(+, 1, 8232558201159, +, 60), + HEX_DBL(+, 1, 0672a3c9eb871, +, 62), + HEX_DBL(+, 1, 64b41c6d37832, +, 63), + HEX_DBL(+, 1, e4cf766fe49be, +, 64), + HEX_DBL(+, 1, 49767bc0483e3, +, 66), + HEX_DBL(+, 1, bfc951eb8bb76, +, 67), + HEX_DBL(+, 1, 304d6aeca254b, +, 69), + HEX_DBL(+, 1, 9d97010884251, +, 70), + HEX_DBL(+, 1, 19103e4080b45, +, 72), + HEX_DBL(+, 1, 7e013cd114461, +, 73), + HEX_DBL(+, 1, 03996528e074c, +, 75), + HEX_DBL(+, 1, 60d4f6fdac731, +, 76), + HEX_DBL(+, 1, df8c5af17ba3b, +, 77), + HEX_DBL(+, 1, 45e3076d61699, +, 79), + HEX_DBL(+, 1, baed16a6e0da7, +, 80), + HEX_DBL(+, 1, 2cffdfebde1a1, +, 82), + HEX_DBL(+, 1, 9919cabefcb69, +, 83), + HEX_DBL(+, 1, 160345c9953e3, +, 85), + HEX_DBL(+, 1, 79dbc9dc53c66, +, 86), + HEX_DBL(+, 1, 00c810d464097, +, 88), + HEX_DBL(+, 1, 5d009394c5c27, +, 89), + HEX_DBL(+, 1, da57de8f107a8, +, 90), + HEX_DBL(+, 1, 425982cf597cd, +, 92), + HEX_DBL(+, 1, b61e5ca3a5e31, +, 93), + HEX_DBL(+, 1, 29bb825dfcf87, +, 95), + HEX_DBL(+, 1, 94a90db0d6fe2, +, 96), + HEX_DBL(+, 1, 12fec759586fd, +, 98), + HEX_DBL(+, 1, 75c1dc469e3af, +, 99), + HEX_DBL(+, 1, fbfd219c43b04, +, 100), + HEX_DBL(+, 1, 5936d44e1a146, +, 102), + HEX_DBL(+, 1, d531d8a7ee79c, +, 103), + HEX_DBL(+, 1, 3ed9d24a2d51b, +, 105), + HEX_DBL(+, 1, b15cfe5b6e17b, +, 106), + HEX_DBL(+, 1, 268038c2c0e, +, 108), + HEX_DBL(+, 1, 9044a73545d48, +, 109), + HEX_DBL(+, 1, 1002ab6218b38, +, 111), + HEX_DBL(+, 1, 71b3540cbf921, +, 112), + HEX_DBL(+, 1, f6799ea9c414a, +, 113), + HEX_DBL(+, 1, 55779b984f3eb, +, 115), + HEX_DBL(+, 1, d01a210c44aa4, +, 116), + HEX_DBL(+, 1, 3b63da8e9121, +, 118), + HEX_DBL(+, 1, aca8d6b0116b8, +, 119), + HEX_DBL(+, 1, 234de9e0c74e9, +, 121), + HEX_DBL(+, 1, 8bec7503ca477, +, 122), + HEX_DBL(+, 1, 0d0eda9796b9, +, 124), + HEX_DBL(+, 1, 6db0118477245, +, 125), + HEX_DBL(+, 1, f1056dc7bf22d, +, 126), + HEX_DBL(+, 1, 51c2cc3433801, +, 128), + HEX_DBL(+, 1, cb108ffbec164, +, 129), + HEX_DBL(+, 1, 37f780991b584, +, 131), + HEX_DBL(+, 1, a801c0ea8ac4d, +, 132), + HEX_DBL(+, 1, 20247cc4c46c1, +, 134), + HEX_DBL(+, 1, 87a0553328015, +, 135), + HEX_DBL(+, 1, 0a233dee4f9bb, +, 137), + HEX_DBL(+, 1, 69b7f55b808ba, +, 138), + HEX_DBL(+, 1, eba064644060a, +, 139), + HEX_DBL(+, 1, 4e184933d9364, +, 141), + HEX_DBL(+, 1, c614fe2531841, +, 142), + HEX_DBL(+, 1, 3494a9b171bf5, +, 144), + HEX_DBL(+, 1, a36798b9d969b, +, 145), + HEX_DBL(+, 1, 1d03d8c0c04af, +, 147), + HEX_DBL(+, 1, 836026385c974, +, 148), + HEX_DBL(+, 1, 073fbe9ac901d, +, 150), + HEX_DBL(+, 1, 65cae0969f286, +, 151), + HEX_DBL(+, 1, e64a58639cae8, +, 152), + HEX_DBL(+, 1, 4a77f5f9b50f9, +, 154), + HEX_DBL(+, 1, c12744a3a28e3, +, 155), + HEX_DBL(+, 1, 313b3b6978e85, +, 157), + HEX_DBL(+, 1, 9eda3a31e587e, +, 158), + HEX_DBL(+, 1, 19ebe56b56453, +, 160), + HEX_DBL(+, 1, 7f2bc6e599b7e, +, 161), + HEX_DBL(+, 1, 04644610df2ff, +, 163), + HEX_DBL(+, 1, 61e8b490ac4e6, +, 164), + HEX_DBL(+, 1, e103201f299b3, +, 165), + HEX_DBL(+, 1, 46e1b637beaf5, +, 167), + HEX_DBL(+, 1, bc473cfede104, +, 168), + HEX_DBL(+, 1, 2deb1b9c85e2d, +, 170), + HEX_DBL(+, 1, 9a5981ca67d1, +, 171), + HEX_DBL(+, 1, 16dc8a9ef670b, +, 173), + HEX_DBL(+, 1, 7b03166942309, +, 174), + HEX_DBL(+, 1, 0190be03150a7, +, 176), + HEX_DBL(+, 1, 5e1152f9a8119, +, 177), + HEX_DBL(+, 1, dbca9263f8487, +, 178), + HEX_DBL(+, 1, 43556dee93bee, +, 180), + HEX_DBL(+, 1, b774c12967dfa, +, 181), + HEX_DBL(+, 1, 2aa4306e922c2, +, 183), + HEX_DBL(+, 1, 95e54c5dd4217, +, 184) + }; + + // scale by e**i -- (expm1(f) + 1)*e**i - 1 = expm1(f) * e**i + e**i - 1 = + // e**i + return exp_table[exponent + 150] + (f * exp_table[exponent + 150] - 1.0); +} + + +double reference_fmax(double x, double y) +{ + if (isnan(y)) return x; return x >= y ? x : y; } -double reference_fmin( double x, double y ) +double reference_fmin(double x, double y) { - if( isnan(y) ) - return x; + if (isnan(y)) return x; return x <= y ? x : y; } -double reference_hypot( double x, double y ) +double reference_hypot(double x, double y) { - // Since the inputs are actually floats, we don't have to worry about range here - if( isinf(x) || isinf(y) ) - return INFINITY; + // Since the inputs are actually floats, we don't have to worry about range + // here + if (isinf(x) || isinf(y)) return INFINITY; - return sqrt( x * x + y * y ); + return sqrt(x * x + y * y); } -int reference_ilogbl( long double x) +int reference_ilogbl(long double x) { extern int gDeviceILogb0, gDeviceILogbNaN; // Since we are just using this to verify double precision, we can // use the double precision ilogb here - union { double f; cl_ulong u;} u; - u.f = (double) x; + union { + double f; + cl_ulong u; + } u; + u.f = (double)x; int exponent = (int)(u.u >> 52) & 0x7ff; - if( exponent == 0x7ff ) + if (exponent == 0x7ff) { - if( u.u & 0x000fffffffffffffULL ) - return gDeviceILogbNaN; + if (u.u & 0x000fffffffffffffULL) return gDeviceILogbNaN; return CL_INT_MAX; } - if( exponent == 0 ) - { // deal with denormals - u.f = x * HEX_DBL( +, 1, 0, +, 64 ); + if (exponent == 0) + { // deal with denormals + u.f = x * HEX_DBL(+, 1, 0, +, 64); exponent = (cl_uint)(u.u >> 52) & 0x7ff; - if( exponent == 0 ) - return gDeviceILogb0; + if (exponent == 0) return gDeviceILogb0; exponent -= 1023 + 64; return exponent; @@ -1522,84 +1694,105 @@ int reference_ilogbl( long double x) return exponent - 1023; } -//double reference_log2( double x ) -//{ -// return log( x ) * 1.44269504088896340735992468100189214; -//} +double reference_relaxed_log2(double x) { return reference_log2(x); } - -double reference_relaxed_log2( double x ) +double reference_log2(double x) { - return reference_log2(x); -} + if (isnan(x) || x < 0.0 || x == -INFINITY) return cl_make_nan(); -double reference_log2( double x ) -{ - if( isnan(x) || x < 0.0 || x == -INFINITY) - return cl_make_nan(); + if (x == 0.0f) return -INFINITY; - if( x == 0.0f) - return -INFINITY; - - if( x == INFINITY ) - return INFINITY; + if (x == INFINITY) return INFINITY; double hi, lo; - __log2_ep( &hi, &lo, x ); + __log2_ep(&hi, &lo, x); return hi; } -double reference_log1p( double x ) -{ // This function is suitable only for verifying log1pf(). It produces several double precision ulps of error. +double reference_log1p(double x) +{ // This function is suitable only for verifying log1pf(). It produces several + // double precision ulps of error. // Handle small and NaN - if( ! ( reference_fabs(x) > HEX_DBL( +, 1, 0, -, 53 ) ) ) - return x; + if (!(reference_fabs(x) > HEX_DBL(+, 1, 0, -, 53))) return x; // deal with special values - if( x <= -1.0 ) + if (x <= -1.0) { - if( x < -1.0 ) - return cl_make_nan(); + if (x < -1.0) return cl_make_nan(); return -INFINITY; } // infinity - if( x == INFINITY ) - return INFINITY; + if (x == INFINITY) return INFINITY; - // High precision result for when near 0, to avoid problems with the reference result falling in the wrong binade. - if( reference_fabs(x) < HEX_DBL( +, 1, 0, -, 28 ) ) - return (1.0 - 0.5 * x) * x; + // High precision result for when near 0, to avoid problems with the + // reference result falling in the wrong binade. + if (reference_fabs(x) < HEX_DBL(+, 1, 0, -, 28)) return (1.0 - 0.5 * x) * x; // Our polynomial is only good in the region +-2**-4. // If we aren't in that range then we need to reduce to be in that range - double correctionLo = -0.0; // correction down stream to compensate for the reduction, if any - double correctionHi = -0.0; // correction down stream to compensate for the exponent, if any - if( reference_fabs(x) > HEX_DBL( +, 1, 0, -, 4 ) ) + double correctionLo = + -0.0; // correction down stream to compensate for the reduction, if any + double correctionHi = + -0.0; // correction down stream to compensate for the exponent, if any + if (reference_fabs(x) > HEX_DBL(+, 1, 0, -, 4)) { - x += 1.0; // double should cover any loss of precision here + x += 1.0; // double should cover any loss of precision here // separate x into (1+f) * 2**i - union{ double d; cl_ulong u;} u; u.d = x; - int i = (int) ((u.u >> 52) & 0x7ff) - 1023; + union { + double d; + cl_ulong u; + } u; + u.d = x; + int i = (int)((u.u >> 52) & 0x7ff) - 1023; u.u &= 0x000fffffffffffffULL; - int index = (int) (u.u >> 48 ); + int index = (int)(u.u >> 48); u.u |= 0x3ff0000000000000ULL; double f = u.d; // further reduce f to be within 1/16 of 1.0 - static const double scale_table[16] = { 1.0, HEX_DBL( +, 1, d2d2d2d6e3f79, -, 1 ), HEX_DBL( +, 1, b8e38e42737a1, -, 1 ), HEX_DBL( +, 1, a1af28711adf3, -, 1 ), - HEX_DBL( +, 1, 8cccccd88dd65, -, 1 ), HEX_DBL( +, 1, 79e79e810ec8f, -, 1 ), HEX_DBL( +, 1, 68ba2e94df404, -, 1 ), HEX_DBL( +, 1, 590b216defb29, -, 1 ), - HEX_DBL( +, 1, 4aaaaab1500ed, -, 1 ), HEX_DBL( +, 1, 3d70a3e0d6f73, -, 1 ), HEX_DBL( +, 1, 313b13bb39f4f, -, 1 ), HEX_DBL( +, 1, 25ed09823f1cc, -, 1 ), - HEX_DBL( +, 1, 1b6db6e77457b, -, 1 ), HEX_DBL( +, 1, 11a7b96a3a34f, -, 1 ), HEX_DBL( +, 1, 0888888e46fea, -, 1 ), HEX_DBL( +, 1, 00000038e9862, -, 1 ) }; + static const double scale_table[16] = { + 1.0, + HEX_DBL(+, 1, d2d2d2d6e3f79, -, 1), + HEX_DBL(+, 1, b8e38e42737a1, -, 1), + HEX_DBL(+, 1, a1af28711adf3, -, 1), + HEX_DBL(+, 1, 8cccccd88dd65, -, 1), + HEX_DBL(+, 1, 79e79e810ec8f, -, 1), + HEX_DBL(+, 1, 68ba2e94df404, -, 1), + HEX_DBL(+, 1, 590b216defb29, -, 1), + HEX_DBL(+, 1, 4aaaaab1500ed, -, 1), + HEX_DBL(+, 1, 3d70a3e0d6f73, -, 1), + HEX_DBL(+, 1, 313b13bb39f4f, -, 1), + HEX_DBL(+, 1, 25ed09823f1cc, -, 1), + HEX_DBL(+, 1, 1b6db6e77457b, -, 1), + HEX_DBL(+, 1, 11a7b96a3a34f, -, 1), + HEX_DBL(+, 1, 0888888e46fea, -, 1), + HEX_DBL(+, 1, 00000038e9862, -, 1) + }; // correction_table[i] = -log( scale_table[i] ) - // All entries have >= 64 bits of precision (rather than the expected 53) - static const double correction_table[16] = { -0.0, HEX_DBL( +, 1, 7a5c722c16058, -, 4 ), HEX_DBL( +, 1, 323db16c89ab1, -, 3 ), HEX_DBL( +, 1, a0f87d180629, -, 3 ), - HEX_DBL( +, 1, 050279324e17c, -, 2 ), HEX_DBL( +, 1, 36f885bb270b0, -, 2 ), HEX_DBL( +, 1, 669b771b5cc69, -, 2 ), HEX_DBL( +, 1, 94203a6292a05, -, 2 ), - HEX_DBL( +, 1, bfb4f9cb333a4, -, 2 ), HEX_DBL( +, 1, e982376ddb80e, -, 2 ), HEX_DBL( +, 1, 08d5d8769b2b2, -, 1 ), HEX_DBL( +, 1, 1c288bc00e0cf, -, 1 ), - HEX_DBL( +, 1, 2ec7535b31ecb, -, 1 ), HEX_DBL( +, 1, 40bed0adc63fb, -, 1 ), HEX_DBL( +, 1, 521a5c0330615, -, 1 ), HEX_DBL( +, 1, 62e42f7dd092c, -, 1 ) }; + // All entries have >= 64 bits of precision (rather than the expected + // 53) + static const double correction_table[16] = { + -0.0, + HEX_DBL(+, 1, 7a5c722c16058, -, 4), + HEX_DBL(+, 1, 323db16c89ab1, -, 3), + HEX_DBL(+, 1, a0f87d180629, -, 3), + HEX_DBL(+, 1, 050279324e17c, -, 2), + HEX_DBL(+, 1, 36f885bb270b0, -, 2), + HEX_DBL(+, 1, 669b771b5cc69, -, 2), + HEX_DBL(+, 1, 94203a6292a05, -, 2), + HEX_DBL(+, 1, bfb4f9cb333a4, -, 2), + HEX_DBL(+, 1, e982376ddb80e, -, 2), + HEX_DBL(+, 1, 08d5d8769b2b2, -, 1), + HEX_DBL(+, 1, 1c288bc00e0cf, -, 1), + HEX_DBL(+, 1, 2ec7535b31ecb, -, 1), + HEX_DBL(+, 1, 40bed0adc63fb, -, 1), + HEX_DBL(+, 1, 521a5c0330615, -, 1), + HEX_DBL(+, 1, 62e42f7dd092c, -, 1) + }; f *= scale_table[index]; correctionLo = correction_table[index]; @@ -1611,17 +1804,25 @@ double reference_log1p( double x ) } - // minmax polynomial for p(x) = (log(x+1) - x)/x valid over the range x = [-1/16, 1/16] + // minmax polynomial for p(x) = (log(x+1) - x)/x valid over the range x = + // [-1/16, 1/16] // max error HEX_DBL( +, 1, 048f61f9a5eca, -, 52 ) - double p = HEX_DBL( -, 1, cc33de97a9d7b, -, 46 ) + - (HEX_DBL( -, 1, fffffffff3eb7, -, 2 ) + - (HEX_DBL( +, 1, 5555555633ef7, -, 2 ) + - (HEX_DBL( -, 1, 00000062c78, -, 2 ) + - (HEX_DBL( +, 1, 9999958a3321, -, 3 ) + - (HEX_DBL( -, 1, 55534ce65c347, -, 3 ) + - (HEX_DBL( +, 1, 24957208391a5, -, 3 ) + - (HEX_DBL( -, 1, 02287b9a5b4a1, -, 3 ) + - HEX_DBL( +, 1, c757d922180ed, -, 4 ) * x)*x)*x)*x)*x)*x)*x)*x; + double p = HEX_DBL(-, 1, cc33de97a9d7b, -, 46) + + (HEX_DBL(-, 1, fffffffff3eb7, -, 2) + + (HEX_DBL(+, 1, 5555555633ef7, -, 2) + + (HEX_DBL(-, 1, 00000062c78, -, 2) + + (HEX_DBL(+, 1, 9999958a3321, -, 3) + + (HEX_DBL(-, 1, 55534ce65c347, -, 3) + + (HEX_DBL(+, 1, 24957208391a5, -, 3) + + (HEX_DBL(-, 1, 02287b9a5b4a1, -, 3) + + HEX_DBL(+, 1, c757d922180ed, -, 4) * x) + * x) + * x) + * x) + * x) + * x) + * x) + * x; // log(x+1) = x * p(x) + x x += x * p; @@ -1629,22 +1830,23 @@ double reference_log1p( double x ) return correctionHi + (correctionLo + x); } -double reference_logb( double x ) +double reference_logb(double x) { - union { float f; cl_uint u;} u; - u.f = (float) x; + union { + float f; + cl_uint u; + } u; + u.f = (float)x; cl_int exponent = (u.u >> 23) & 0xff; - if( exponent == 0xff ) - return x * x; + if (exponent == 0xff) return x * x; - if( exponent == 0 ) - { // deal with denormals + if (exponent == 0) + { // deal with denormals u.u = (u.u & 0x007fffff) | 0x3f800000; u.f -= 1.0f; exponent = (u.u >> 23) & 0xff; - if( exponent == 0 ) - return -INFINITY; + if (exponent == 0) return -INFINITY; return exponent - (127 + 126); } @@ -1652,219 +1854,271 @@ double reference_logb( double x ) return exponent - 127; } -double reference_relaxed_reciprocal(double x) -{ - return 1.0f / ((float) x); -} +double reference_relaxed_reciprocal(double x) { return 1.0f / ((float)x); } -double reference_reciprocal( double x ) -{ - return 1.0 / x; -} +double reference_reciprocal(double x) { return 1.0 / x; } -double reference_remainder( double x, double y ) +double reference_remainder(double x, double y) { int i; - return reference_remquo( x, y, &i ); -} - -double reference_lgamma( double x) -{ -/* - * ==================================================== - * This function is from fdlibm. http://www.netlib.org - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunSoft, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - * - */ - -static const double //two52 = 4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */ - half= 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */ - one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */ - pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */ - a0 = 7.72156649015328655494e-02, /* 0x3FB3C467, 0xE37DB0C8 */ - a1 = 3.22467033424113591611e-01, /* 0x3FD4A34C, 0xC4A60FAD */ - a2 = 6.73523010531292681824e-02, /* 0x3FB13E00, 0x1A5562A7 */ - a3 = 2.05808084325167332806e-02, /* 0x3F951322, 0xAC92547B */ - a4 = 7.38555086081402883957e-03, /* 0x3F7E404F, 0xB68FEFE8 */ - a5 = 2.89051383673415629091e-03, /* 0x3F67ADD8, 0xCCB7926B */ - a6 = 1.19270763183362067845e-03, /* 0x3F538A94, 0x116F3F5D */ - a7 = 5.10069792153511336608e-04, /* 0x3F40B6C6, 0x89B99C00 */ - a8 = 2.20862790713908385557e-04, /* 0x3F2CF2EC, 0xED10E54D */ - a9 = 1.08011567247583939954e-04, /* 0x3F1C5088, 0x987DFB07 */ - a10 = 2.52144565451257326939e-05, /* 0x3EFA7074, 0x428CFA52 */ - a11 = 4.48640949618915160150e-05, /* 0x3F07858E, 0x90A45837 */ - tc = 1.46163214496836224576e+00, /* 0x3FF762D8, 0x6356BE3F */ - tf = -1.21486290535849611461e-01, /* 0xBFBF19B9, 0xBCC38A42 */ - /* tt = -(tail of tf) */ - tt = -3.63867699703950536541e-18, /* 0xBC50C7CA, 0xA48A971F */ - t0 = 4.83836122723810047042e-01, /* 0x3FDEF72B, 0xC8EE38A2 */ - t1 = -1.47587722994593911752e-01, /* 0xBFC2E427, 0x8DC6C509 */ - t2 = 6.46249402391333854778e-02, /* 0x3FB08B42, 0x94D5419B */ - t3 = -3.27885410759859649565e-02, /* 0xBFA0C9A8, 0xDF35B713 */ - t4 = 1.79706750811820387126e-02, /* 0x3F9266E7, 0x970AF9EC */ - t5 = -1.03142241298341437450e-02, /* 0xBF851F9F, 0xBA91EC6A */ - t6 = 6.10053870246291332635e-03, /* 0x3F78FCE0, 0xE370E344 */ - t7 = -3.68452016781138256760e-03, /* 0xBF6E2EFF, 0xB3E914D7 */ - t8 = 2.25964780900612472250e-03, /* 0x3F6282D3, 0x2E15C915 */ - t9 = -1.40346469989232843813e-03, /* 0xBF56FE8E, 0xBF2D1AF1 */ - t10 = 8.81081882437654011382e-04, /* 0x3F4CDF0C, 0xEF61A8E9 */ - t11 = -5.38595305356740546715e-04, /* 0xBF41A610, 0x9C73E0EC */ - t12 = 3.15632070903625950361e-04, /* 0x3F34AF6D, 0x6C0EBBF7 */ - t13 = -3.12754168375120860518e-04, /* 0xBF347F24, 0xECC38C38 */ - t14 = 3.35529192635519073543e-04, /* 0x3F35FD3E, 0xE8C2D3F4 */ - u0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */ - u1 = 6.32827064025093366517e-01, /* 0x3FE4401E, 0x8B005DFF */ - u2 = 1.45492250137234768737e+00, /* 0x3FF7475C, 0xD119BD6F */ - u3 = 9.77717527963372745603e-01, /* 0x3FEF4976, 0x44EA8450 */ - u4 = 2.28963728064692451092e-01, /* 0x3FCD4EAE, 0xF6010924 */ - u5 = 1.33810918536787660377e-02, /* 0x3F8B678B, 0xBF2BAB09 */ - v1 = 2.45597793713041134822e+00, /* 0x4003A5D7, 0xC2BD619C */ - v2 = 2.12848976379893395361e+00, /* 0x40010725, 0xA42B18F5 */ - v3 = 7.69285150456672783825e-01, /* 0x3FE89DFB, 0xE45050AF */ - v4 = 1.04222645593369134254e-01, /* 0x3FBAAE55, 0xD6537C88 */ - v5 = 3.21709242282423911810e-03, /* 0x3F6A5ABB, 0x57D0CF61 */ - s0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */ - s1 = 2.14982415960608852501e-01, /* 0x3FCB848B, 0x36E20878 */ - s2 = 3.25778796408930981787e-01, /* 0x3FD4D98F, 0x4F139F59 */ - s3 = 1.46350472652464452805e-01, /* 0x3FC2BB9C, 0xBEE5F2F7 */ - s4 = 2.66422703033638609560e-02, /* 0x3F9B481C, 0x7E939961 */ - s5 = 1.84028451407337715652e-03, /* 0x3F5E26B6, 0x7368F239 */ - s6 = 3.19475326584100867617e-05, /* 0x3F00BFEC, 0xDD17E945 */ - r1 = 1.39200533467621045958e+00, /* 0x3FF645A7, 0x62C4AB74 */ - r2 = 7.21935547567138069525e-01, /* 0x3FE71A18, 0x93D3DCDC */ - r3 = 1.71933865632803078993e-01, /* 0x3FC601ED, 0xCCFBDF27 */ - r4 = 1.86459191715652901344e-02, /* 0x3F9317EA, 0x742ED475 */ - r5 = 7.77942496381893596434e-04, /* 0x3F497DDA, 0xCA41A95B */ - r6 = 7.32668430744625636189e-06, /* 0x3EDEBAF7, 0xA5B38140 */ - w0 = 4.18938533204672725052e-01, /* 0x3FDACFE3, 0x90C97D69 */ - w1 = 8.33333333333329678849e-02, /* 0x3FB55555, 0x5555553B */ - w2 = -2.77777777728775536470e-03, /* 0xBF66C16C, 0x16B02E5C */ - w3 = 7.93650558643019558500e-04, /* 0x3F4A019F, 0x98CF38B6 */ - w4 = -5.95187557450339963135e-04, /* 0xBF4380CB, 0x8C0FE741 */ - w5 = 8.36339918996282139126e-04, /* 0x3F4B67BA, 0x4CDAD5D1 */ - w6 = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */ - - static const double zero= 0.00000000000000000000e+00; - double t,y,z,nadj,p,p1,p2,p3,q,r,w; - cl_int i,hx,lx,ix; - - union{ double d; cl_ulong u;}u; u.d = x; - - hx = (cl_int) (u.u >> 32); - lx = (cl_int) (u.u & 0xffffffffULL); + return reference_remquo(x, y, &i); +} + +double reference_lgamma(double x) +{ + /* + * ==================================================== + * This function is from fdlibm. http://www.netlib.org + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + * + */ + + static const double // two52 = 4.50359962737049600000e+15, /* 0x43300000, + // 0x00000000 */ + half = 5.00000000000000000000e-01, /* 0x3FE00000, + 0x00000000 */ + one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */ + pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */ + a0 = 7.72156649015328655494e-02, /* 0x3FB3C467, 0xE37DB0C8 */ + a1 = 3.22467033424113591611e-01, /* 0x3FD4A34C, 0xC4A60FAD */ + a2 = 6.73523010531292681824e-02, /* 0x3FB13E00, 0x1A5562A7 */ + a3 = 2.05808084325167332806e-02, /* 0x3F951322, 0xAC92547B */ + a4 = 7.38555086081402883957e-03, /* 0x3F7E404F, 0xB68FEFE8 */ + a5 = 2.89051383673415629091e-03, /* 0x3F67ADD8, 0xCCB7926B */ + a6 = 1.19270763183362067845e-03, /* 0x3F538A94, 0x116F3F5D */ + a7 = 5.10069792153511336608e-04, /* 0x3F40B6C6, 0x89B99C00 */ + a8 = 2.20862790713908385557e-04, /* 0x3F2CF2EC, 0xED10E54D */ + a9 = 1.08011567247583939954e-04, /* 0x3F1C5088, 0x987DFB07 */ + a10 = 2.52144565451257326939e-05, /* 0x3EFA7074, 0x428CFA52 */ + a11 = 4.48640949618915160150e-05, /* 0x3F07858E, 0x90A45837 */ + tc = 1.46163214496836224576e+00, /* 0x3FF762D8, 0x6356BE3F */ + tf = -1.21486290535849611461e-01, /* 0xBFBF19B9, 0xBCC38A42 */ + /* tt = -(tail of tf) */ + tt = -3.63867699703950536541e-18, /* 0xBC50C7CA, 0xA48A971F */ + t0 = 4.83836122723810047042e-01, /* 0x3FDEF72B, 0xC8EE38A2 */ + t1 = -1.47587722994593911752e-01, /* 0xBFC2E427, 0x8DC6C509 */ + t2 = 6.46249402391333854778e-02, /* 0x3FB08B42, 0x94D5419B */ + t3 = -3.27885410759859649565e-02, /* 0xBFA0C9A8, 0xDF35B713 */ + t4 = 1.79706750811820387126e-02, /* 0x3F9266E7, 0x970AF9EC */ + t5 = -1.03142241298341437450e-02, /* 0xBF851F9F, 0xBA91EC6A */ + t6 = 6.10053870246291332635e-03, /* 0x3F78FCE0, 0xE370E344 */ + t7 = -3.68452016781138256760e-03, /* 0xBF6E2EFF, 0xB3E914D7 */ + t8 = 2.25964780900612472250e-03, /* 0x3F6282D3, 0x2E15C915 */ + t9 = -1.40346469989232843813e-03, /* 0xBF56FE8E, 0xBF2D1AF1 */ + t10 = 8.81081882437654011382e-04, /* 0x3F4CDF0C, 0xEF61A8E9 */ + t11 = -5.38595305356740546715e-04, /* 0xBF41A610, 0x9C73E0EC */ + t12 = 3.15632070903625950361e-04, /* 0x3F34AF6D, 0x6C0EBBF7 */ + t13 = -3.12754168375120860518e-04, /* 0xBF347F24, 0xECC38C38 */ + t14 = 3.35529192635519073543e-04, /* 0x3F35FD3E, 0xE8C2D3F4 */ + u0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */ + u1 = 6.32827064025093366517e-01, /* 0x3FE4401E, 0x8B005DFF */ + u2 = 1.45492250137234768737e+00, /* 0x3FF7475C, 0xD119BD6F */ + u3 = 9.77717527963372745603e-01, /* 0x3FEF4976, 0x44EA8450 */ + u4 = 2.28963728064692451092e-01, /* 0x3FCD4EAE, 0xF6010924 */ + u5 = 1.33810918536787660377e-02, /* 0x3F8B678B, 0xBF2BAB09 */ + v1 = 2.45597793713041134822e+00, /* 0x4003A5D7, 0xC2BD619C */ + v2 = 2.12848976379893395361e+00, /* 0x40010725, 0xA42B18F5 */ + v3 = 7.69285150456672783825e-01, /* 0x3FE89DFB, 0xE45050AF */ + v4 = 1.04222645593369134254e-01, /* 0x3FBAAE55, 0xD6537C88 */ + v5 = 3.21709242282423911810e-03, /* 0x3F6A5ABB, 0x57D0CF61 */ + s0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */ + s1 = 2.14982415960608852501e-01, /* 0x3FCB848B, 0x36E20878 */ + s2 = 3.25778796408930981787e-01, /* 0x3FD4D98F, 0x4F139F59 */ + s3 = 1.46350472652464452805e-01, /* 0x3FC2BB9C, 0xBEE5F2F7 */ + s4 = 2.66422703033638609560e-02, /* 0x3F9B481C, 0x7E939961 */ + s5 = 1.84028451407337715652e-03, /* 0x3F5E26B6, 0x7368F239 */ + s6 = 3.19475326584100867617e-05, /* 0x3F00BFEC, 0xDD17E945 */ + r1 = 1.39200533467621045958e+00, /* 0x3FF645A7, 0x62C4AB74 */ + r2 = 7.21935547567138069525e-01, /* 0x3FE71A18, 0x93D3DCDC */ + r3 = 1.71933865632803078993e-01, /* 0x3FC601ED, 0xCCFBDF27 */ + r4 = 1.86459191715652901344e-02, /* 0x3F9317EA, 0x742ED475 */ + r5 = 7.77942496381893596434e-04, /* 0x3F497DDA, 0xCA41A95B */ + r6 = 7.32668430744625636189e-06, /* 0x3EDEBAF7, 0xA5B38140 */ + w0 = 4.18938533204672725052e-01, /* 0x3FDACFE3, 0x90C97D69 */ + w1 = 8.33333333333329678849e-02, /* 0x3FB55555, 0x5555553B */ + w2 = -2.77777777728775536470e-03, /* 0xBF66C16C, 0x16B02E5C */ + w3 = 7.93650558643019558500e-04, /* 0x3F4A019F, 0x98CF38B6 */ + w4 = -5.95187557450339963135e-04, /* 0xBF4380CB, 0x8C0FE741 */ + w5 = 8.36339918996282139126e-04, /* 0x3F4B67BA, 0x4CDAD5D1 */ + w6 = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */ + + static const double zero = 0.00000000000000000000e+00; + double t, y, z, nadj, p, p1, p2, p3, q, r, w; + cl_int i, hx, lx, ix; + + union { + double d; + cl_ulong u; + } u; + u.d = x; + + hx = (cl_int)(u.u >> 32); + lx = (cl_int)(u.u & 0xffffffffULL); /* purge off +-inf, NaN, +-0, and negative arguments */ -// *signgamp = 1; - ix = hx&0x7fffffff; - if(ix>=0x7ff00000) return x*x; - if((ix|lx)==0) return INFINITY; - if(ix<0x3b900000) { /* |x|<2**-70, return -log(|x|) */ - if(hx<0) { -// *signgamp = -1; + // *signgamp = 1; + ix = hx & 0x7fffffff; + if (ix >= 0x7ff00000) return x * x; + if ((ix | lx) == 0) return INFINITY; + if (ix < 0x3b900000) + { /* |x|<2**-70, return -log(|x|) */ + if (hx < 0) + { + // *signgamp = -1; return -reference_log(-x); - } else return -reference_log(x); + } + else + return -reference_log(x); } - if(hx<0) { - if(ix>=0x43300000) /* |x|>=2**52, must be -integer */ - return INFINITY; + if (hx < 0) + { + if (ix >= 0x43300000) /* |x|>=2**52, must be -integer */ + return INFINITY; t = reference_sinpi(x); - if(t==zero) return INFINITY; /* -integer */ - nadj = reference_log(pi/reference_fabs(t*x)); -// if(t=0x3FE76944) {y = 1.0-x; i= 0;} - else if(ix>=0x3FCDA661) {y= x-(tc-one); i=1;} - else {y = x; i=2;} - } else { - r = zero; - if(ix>=0x3FFBB4C3) {y=2.0-x;i=0;} /* [1.7316,2] */ - else if(ix>=0x3FF3B4C4) {y=x-tc;i=1;} /* [1.23,1.73] */ - else {y=x-one;i=2;} + else if (ix < 0x40000000) + { + if (ix <= 0x3feccccc) + { /* lgamma(x) = lgamma(x+1)-log(x) */ + r = -reference_log(x); + if (ix >= 0x3FE76944) + { + y = 1.0 - x; + i = 0; + } + else if (ix >= 0x3FCDA661) + { + y = x - (tc - one); + i = 1; + } + else + { + y = x; + i = 2; + } } - switch(i) { - case 0: - z = y*y; - p1 = a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10)))); - p2 = z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11))))); - p = y*p1+p2; - r += (p-0.5*y); break; - case 1: - z = y*y; - w = z*y; - p1 = t0+w*(t3+w*(t6+w*(t9 +w*t12))); /* parallel comp */ - p2 = t1+w*(t4+w*(t7+w*(t10+w*t13))); - p3 = t2+w*(t5+w*(t8+w*(t11+w*t14))); - p = z*p1-(tt-w*(p2+y*p3)); - r += (tf + p); break; - case 2: - p1 = y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5))))); - p2 = one+y*(v1+y*(v2+y*(v3+y*(v4+y*v5)))); - r += (-0.5*y + p1/p2); + else + { + r = zero; + if (ix >= 0x3FFBB4C3) + { + y = 2.0 - x; + i = 0; + } /* [1.7316,2] */ + else if (ix >= 0x3FF3B4C4) + { + y = x - tc; + i = 1; + } /* [1.23,1.73] */ + else + { + y = x - one; + i = 2; + } + } + switch (i) + { + case 0: + z = y * y; + p1 = a0 + z * (a2 + z * (a4 + z * (a6 + z * (a8 + z * a10)))); + p2 = z + * (a1 + + z * (a3 + z * (a5 + z * (a7 + z * (a9 + z * a11))))); + p = y * p1 + p2; + r += (p - 0.5 * y); + break; + case 1: + z = y * y; + w = z * y; + p1 = t0 + + w + * (t3 + + w * (t6 + w * (t9 + w * t12))); /* parallel comp */ + p2 = t1 + w * (t4 + w * (t7 + w * (t10 + w * t13))); + p3 = t2 + w * (t5 + w * (t8 + w * (t11 + w * t14))); + p = z * p1 - (tt - w * (p2 + y * p3)); + r += (tf + p); + break; + case 2: + p1 = y + * (u0 + y * (u1 + y * (u2 + y * (u3 + y * (u4 + y * u5))))); + p2 = one + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * v5)))); + r += (-0.5 * y + p1 / p2); } } - else if(ix<0x40200000) { /* x < 8.0 */ + else if (ix < 0x40200000) + { /* x < 8.0 */ i = (int)x; t = zero; - y = x-(double)i; - p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6)))))); - q = one+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6))))); - r = half*y+p/q; - z = one; /* lgamma(1+s) = log(s) + lgamma(s) */ - switch(i) { - case 7: z *= (y+6.0); /* FALLTHRU */ - case 6: z *= (y+5.0); /* FALLTHRU */ - case 5: z *= (y+4.0); /* FALLTHRU */ - case 4: z *= (y+3.0); /* FALLTHRU */ - case 3: z *= (y+2.0); /* FALLTHRU */ - r += reference_log(z); break; + y = x - (double)i; + p = y + * (s0 + + y * (s1 + y * (s2 + y * (s3 + y * (s4 + y * (s5 + y * s6)))))); + q = one + y * (r1 + y * (r2 + y * (r3 + y * (r4 + y * (r5 + y * r6))))); + r = half * y + p / q; + z = one; /* lgamma(1+s) = log(s) + lgamma(s) */ + switch (i) + { + case 7: z *= (y + 6.0); /* FALLTHRU */ + case 6: z *= (y + 5.0); /* FALLTHRU */ + case 5: z *= (y + 4.0); /* FALLTHRU */ + case 4: z *= (y + 3.0); /* FALLTHRU */ + case 3: + z *= (y + 2.0); /* FALLTHRU */ + r += reference_log(z); + break; } - /* 8.0 <= x < 2**58 */ - } else if (ix < 0x43900000) { + /* 8.0 <= x < 2**58 */ + } + else if (ix < 0x43900000) + { t = reference_log(x); - z = one/x; - y = z*z; - w = w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*w6))))); - r = (x-half)*(t-one)+w; - } else - /* 2**58 <= x <= inf */ - r = x*(reference_log(x)-one); - if(hx<0) r = nadj - r; + z = one / x; + y = z * z; + w = w0 + z * (w1 + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * w6))))); + r = (x - half) * (t - one) + w; + } + else + /* 2**58 <= x <= inf */ + r = x * (reference_log(x) - one); + if (hx < 0) r = nadj - r; return r; - } #endif // _MSC_VER -double reference_assignment( double x ){ return x; } +double reference_assignment(double x) { return x; } -int reference_not( double x ) +int reference_not(double x) { - int r = !x; - return r; + int r = !x; + return r; } #pragma mark - #pragma mark Double testing #ifndef M_PIL - #define M_PIL 3.14159265358979323846264338327950288419716939937510582097494459230781640628620899L +#define M_PIL \ + 3.14159265358979323846264338327950288419716939937510582097494459230781640628620899L #endif -static long double reduce1l( long double x ); +static long double reduce1l(long double x); #ifdef __PPC__ // Since long double on PPC is really extended precision double arithmetic @@ -1873,36 +2127,35 @@ static long double reduce1l( long double x ); // such that reduction algorithm used for other architectures will not work. // Instead and alternate reduction method is used. -static long double reduce1l( long double x ) +static long double reduce1l(long double x) { - union { - long double ld; - double d[2]; - } u; + union { + long double ld; + double d[2]; + } u; - // Reduce the high and low halfs separately. - u.ld = x; - return ((long double)reduce1(u.d[0]) + reduce1(u.d[1])); + // Reduce the high and low halfs separately. + u.ld = x; + return ((long double)reduce1(u.d[0]) + reduce1(u.d[1])); } #else // !__PPC__ -static long double reduce1l( long double x ) +static long double reduce1l(long double x) { static long double unit_exp = 0; - if( 0.0L == unit_exp ) - unit_exp = scalbnl( 1.0L, LDBL_MANT_DIG); + if (0.0L == unit_exp) unit_exp = scalbnl(1.0L, LDBL_MANT_DIG); - if( reference_fabsl(x) >= unit_exp ) + if (reference_fabsl(x) >= unit_exp) { - if( reference_fabsl(x) == INFINITY ) - return cl_make_nan(); + if (reference_fabsl(x) == INFINITY) return cl_make_nan(); - return 0.0L; //we patch up the sign for sinPi and cosPi later, since they need different signs + return 0.0L; // we patch up the sign for sinPi and cosPi later, since + // they need different signs } // Find the nearest multiple of 2 - const long double r = reference_copysignl( unit_exp, x ); + const long double r = reference_copysignl(unit_exp, x); long double z = x + r; z -= r; @@ -1911,19 +2164,31 @@ static long double reduce1l( long double x ) } #endif // __PPC__ -long double reference_acospil( long double x){ return reference_acosl( x ) / M_PIL; } -long double reference_asinpil( long double x){ return reference_asinl( x ) / M_PIL; } -long double reference_atanpil( long double x){ return reference_atanl( x ) / M_PIL; } -long double reference_atan2pil( long double y, long double x){ return reference_atan2l( y, x) / M_PIL; } -long double reference_cospil( long double x) +long double reference_acospil(long double x) +{ + return reference_acosl(x) / M_PIL; +} +long double reference_asinpil(long double x) +{ + return reference_asinl(x) / M_PIL; +} +long double reference_atanpil(long double x) { - if( reference_fabsl(x) >= HEX_LDBL( +, 1, 0, +, 54 ) ) + return reference_atanl(x) / M_PIL; +} +long double reference_atan2pil(long double y, long double x) +{ + return reference_atan2l(y, x) / M_PIL; +} +long double reference_cospil(long double x) +{ + if (reference_fabsl(x) >= HEX_LDBL(+, 1, 0, +, 54)) { - if( reference_fabsl(x) == INFINITY ) - return cl_make_nan(); + if (reference_fabsl(x) == INFINITY) return cl_make_nan(); - //Note this probably fails for odd values between 0x1.0p52 and 0x1.0p53. - //However, when starting with single precision inputs, there will be no odd values. + // Note this probably fails for odd values between 0x1.0p52 and + // 0x1.0p53. However, when starting with single precision inputs, there + // will be no odd values. return 1.0L; } @@ -1935,9 +2200,9 @@ long double reference_cospil( long double x) // phase adjust double xhi = 0.0; double xlo = 0.0; - xhi = (double) x + 0.5; + xhi = (double)x + 0.5; - if(reference_fabsl(x) > 0.5L) + if (reference_fabsl(x) > 0.5L) { xlo = xhi - x; xlo = 0.5 - xlo; @@ -1949,61 +2214,69 @@ long double reference_cospil( long double x) } // reduce to [-0.5, 0.5] - if( xhi < -0.5 ) + if (xhi < -0.5) { xhi = -1.0 - xhi; xlo = -xlo; } - else if ( xhi > 0.5 ) + else if (xhi > 0.5) { xhi = 1.0 - xhi; xlo = -xlo; } // cosPi zeros are all +0 - if( xhi == 0.0 && xlo == 0.0 ) - return 0.0; + if (xhi == 0.0 && xlo == 0.0) return 0.0; xhi *= M_PI; xlo *= M_PI; xhi += xlo; - return reference_sinl( xhi ); + return reference_sinl(xhi); #else // phase adjust x += 0.5L; // reduce to [-0.5, 0.5] - if( x < -0.5L ) + if (x < -0.5L) x = -1.0L - x; - else if ( x > 0.5L ) + else if (x > 0.5L) x = 1.0L - x; // cosPi zeros are all +0 - if( x == 0.0L ) - return 0.0L; + if (x == 0.0L) return 0.0L; - return reference_sinl( x * M_PIL ); + return reference_sinl(x * M_PIL); #endif } -long double reference_dividel( long double x, long double y) +long double reference_dividel(long double x, long double y) { double dx = x; double dy = y; - return dx/dy; + return dx / dy; } -typedef struct{ double hi, lo; } double_double; +struct double_double +{ + double hi, lo; +}; -// Split doubles_double into a series of consecutive 26-bit precise doubles and a remainder. -// Note for later -- for multiplication, it might be better to split each double into a power of two and two 26 bit portions -// multiplication of a double double by a known power of two is cheap. The current approach causes some inexact arithmetic in mul_dd. -static inline void split_dd( double_double x, double_double *hi, double_double *lo ) +// Split doubles_double into a series of consecutive 26-bit precise doubles and +// a remainder. Note for later -- for multiplication, it might be better to +// split each double into a power of two and two 26 bit portions +// multiplication of a double double by a known power of +// two is cheap. The current approach causes some inexact +// arithmetic in mul_dd. +static inline void split_dd(double_double x, double_double *hi, + double_double *lo) { - union{ double d; cl_ulong u;}u; + union { + double d; + cl_ulong u; + } u; u.d = x.hi; u.u &= 0xFFFFFFFFF8000000ULL; hi->hi = u.d; @@ -2025,10 +2298,10 @@ static inline void split_dd( double_double x, double_double *hi, double_double * lo->lo = x.hi + x.lo; } -static inline double_double accum_d( double_double a, double b ) +static inline double_double accum_d(double_double a, double b) { double temp; - if( fabs(b) > fabs(a.hi) ) + if (fabs(b) > fabs(a.hi)) { temp = a.hi; a.hi += b; @@ -2041,47 +2314,45 @@ static inline double_double accum_d( double_double a, double b ) a.lo += b - (a.hi - temp); } - if( isnan( a.lo ) ) - a.lo = 0.0; + if (isnan(a.lo)) a.lo = 0.0; return a; } -static inline double_double add_dd( double_double a, double_double b ) +static inline double_double add_dd(double_double a, double_double b) { - double_double r = {-0.0 -0.0 }; + double_double r = { -0.0 - 0.0 }; - if( isinf(a.hi) || isinf( b.hi ) || - isnan(a.hi) || isnan( b.hi ) || - 0.0 == a.hi || 0.0 == b.hi ) + if (isinf(a.hi) || isinf(b.hi) || isnan(a.hi) || isnan(b.hi) || 0.0 == a.hi + || 0.0 == b.hi) { r.hi = a.hi + b.hi; r.lo = a.lo + b.lo; - if( isnan( r.lo ) ) - r.lo = 0.0; + if (isnan(r.lo)) r.lo = 0.0; return r; } - //merge sort terms by magnitude -- here we assume that |a.hi| > |a.lo|, |b.hi| > |b.lo|, so we don't have to do the first merge pass + // merge sort terms by magnitude -- here we assume that |a.hi| > |a.lo|, + // |b.hi| > |b.lo|, so we don't have to do the first merge pass double terms[4] = { a.hi, b.hi, a.lo, b.lo }; double temp; - //Sort hi terms - if( fabs(terms[0]) < fabs(terms[1]) ) + // Sort hi terms + if (fabs(terms[0]) < fabs(terms[1])) { temp = terms[0]; terms[0] = terms[1]; terms[1] = temp; } - //sort lo terms - if( fabs(terms[2]) < fabs(terms[3]) ) + // sort lo terms + if (fabs(terms[2]) < fabs(terms[3])) { temp = terms[2]; terms[2] = terms[3]; terms[3] = temp; } // Fix case where small high term is less than large low term - if( fabs(terms[1]) < fabs(terms[2]) ) + if (fabs(terms[1]) < fabs(terms[2])) { temp = terms[1]; terms[1] = terms[2]; @@ -2104,111 +2375,96 @@ static inline double_double add_dd( double_double a, double_double b ) temp = r.hi; r.hi += r.lo; r.lo = r.lo - (r.hi - temp); - if( isnan( r.lo ) ) - r.lo = 0.0; + if (isnan(r.lo)) r.lo = 0.0; return r; } -static inline double_double mul_dd( double_double a, double_double b ) +static inline double_double mul_dd(double_double a, double_double b) { - double_double result = {-0.0,-0.0}; + double_double result = { -0.0, -0.0 }; // Inf, nan and 0 - if( isnan( a.hi ) || isnan( b.hi ) || - isinf( a.hi ) || isinf( b.hi ) || - 0.0 == a.hi || 0.0 == b.hi ) + if (isnan(a.hi) || isnan(b.hi) || isinf(a.hi) || isinf(b.hi) || 0.0 == a.hi + || 0.0 == b.hi) { result.hi = a.hi * b.hi; return result; } double_double ah, al, bh, bl; - split_dd( a, &ah, &al ); - split_dd( b, &bh, &bl ); - - double p0 = ah.hi * bh.hi; // exact (52 bits in product) 0 - double p1 = ah.hi * bh.lo; // exact (52 bits in product) 26 - double p2 = ah.lo * bh.hi; // exact (52 bits in product) 26 - double p3 = ah.lo * bh.lo; // exact (52 bits in product) 52 - double p4 = al.hi * bh.hi; // exact (52 bits in product) 52 - double p5 = al.hi * bh.lo; // exact (52 bits in product) 78 - double p6 = al.lo * bh.hi; // inexact (54 bits in product) 78 - double p7 = al.lo * bh.lo; // inexact (54 bits in product) 104 - double p8 = ah.hi * bl.hi; // exact (52 bits in product) 52 - double p9 = ah.hi * bl.lo; // inexact (54 bits in product) 78 - double pA = ah.lo * bl.hi; // exact (52 bits in product) 78 - double pB = ah.lo * bl.lo; // inexact (54 bits in product) 104 - double pC = al.hi * bl.hi; // exact (52 bits in product) 104 + split_dd(a, &ah, &al); + split_dd(b, &bh, &bl); + + double p0 = ah.hi * bh.hi; // exact (52 bits in product) 0 + double p1 = ah.hi * bh.lo; // exact (52 bits in product) 26 + double p2 = ah.lo * bh.hi; // exact (52 bits in product) 26 + double p3 = ah.lo * bh.lo; // exact (52 bits in product) 52 + double p4 = al.hi * bh.hi; // exact (52 bits in product) 52 + double p5 = al.hi * bh.lo; // exact (52 bits in product) 78 + double p6 = al.lo * bh.hi; // inexact (54 bits in product) 78 + double p7 = al.lo * bh.lo; // inexact (54 bits in product) 104 + double p8 = ah.hi * bl.hi; // exact (52 bits in product) 52 + double p9 = ah.hi * bl.lo; // inexact (54 bits in product) 78 + double pA = ah.lo * bl.hi; // exact (52 bits in product) 78 + double pB = ah.lo * bl.lo; // inexact (54 bits in product) 104 + double pC = al.hi * bl.hi; // exact (52 bits in product) 104 // the last 3 terms are two low to appear in the result - // accumulate from bottom up -#if 0 - // works but slow - result.hi = pC; - result = accum_d( result, pB ); - result = accum_d( result, p7 ); - result = accum_d( result, pA ); - result = accum_d( result, p9 ); - result = accum_d( result, p6 ); - result = accum_d( result, p5 ); - result = accum_d( result, p8 ); - result = accum_d( result, p4 ); - result = accum_d( result, p3 ); - result = accum_d( result, p2 ); - result = accum_d( result, p1 ); - result = accum_d( result, p0 ); - - // canonicalize the result - double temp = result.hi; - result.hi += result.lo; - result.lo -= (result.hi - temp); - if( isnan( result.lo ) ) - result.lo = 0.0; - - return result; -#else - // take advantage of the known relative magnitudes of the partial products to avoid some sorting - // Combine 2**-78 and 2**-104 terms. Here we are a bit sloppy about canonicalizing the double_doubles + // take advantage of the known relative magnitudes of the partial products + // to avoid some sorting Combine 2**-78 and 2**-104 terms. Here we are a bit + // sloppy about canonicalizing the double_doubles double_double t0 = { pA, pC }; double_double t1 = { p9, pB }; double_double t2 = { p6, p7 }; double temp0, temp1, temp2; - t0 = accum_d( t0, p5 ); // there is an extra 2**-78 term to deal with - - // Add in 2**-52 terms. Here we are a bit sloppy about canonicalizing the double_doubles - temp0 = t0.hi; temp1 = t1.hi; temp2 = t2.hi; - t0.hi += p3; t1.hi += p4; t2.hi += p8; - temp0 -= t0.hi-p3; temp1 -= t1.hi-p4; temp2 -= t2.hi - p8; - t0.lo += temp0; t1.lo += temp1; t2.lo += temp2; - - // Add in 2**-26 terms. Here we are a bit sloppy about canonicalizing the double_doubles - temp1 = t1.hi; temp2 = t2.hi; - t1.hi += p1; t2.hi += p2; - temp1 -= t1.hi-p1; temp2 -= t2.hi - p2; - t1.lo += temp1; t2.lo += temp2; + t0 = accum_d(t0, p5); // there is an extra 2**-78 term to deal with + + // Add in 2**-52 terms. Here we are a bit sloppy about canonicalizing the + // double_doubles + temp0 = t0.hi; + temp1 = t1.hi; + temp2 = t2.hi; + t0.hi += p3; + t1.hi += p4; + t2.hi += p8; + temp0 -= t0.hi - p3; + temp1 -= t1.hi - p4; + temp2 -= t2.hi - p8; + t0.lo += temp0; + t1.lo += temp1; + t2.lo += temp2; + + // Add in 2**-26 terms. Here we are a bit sloppy about canonicalizing the + // double_doubles + temp1 = t1.hi; + temp2 = t2.hi; + t1.hi += p1; + t2.hi += p2; + temp1 -= t1.hi - p1; + temp2 -= t2.hi - p2; + t1.lo += temp1; + t2.lo += temp2; // Combine accumulators to get the low bits of result - t1 = add_dd( t1, add_dd( t2, t0 ) ); + t1 = add_dd(t1, add_dd(t2, t0)); // Add in MSB's, and round to precision - return accum_d( t1, p0 ); // canonicalizes -#endif - + return accum_d(t1, p0); // canonicalizes } -long double reference_exp10l( long double z ) +long double reference_exp10l(long double z) { - const double_double log2_10 = { HEX_DBL( +, 1, a934f0979a371, +, 1 ), HEX_DBL( +, 1, 7f2495fb7fa6d, -, 53 ) }; + const double_double log2_10 = { HEX_DBL(+, 1, a934f0979a371, +, 1), + HEX_DBL(+, 1, 7f2495fb7fa6d, -, 53) }; double_double x; int j; // Handle NaNs - if( isnan(z) ) - return z; + if (isnan(z)) return z; // init x x.hi = z; @@ -2217,172 +2473,193 @@ long double reference_exp10l( long double z ) // 10**x = exp2( x * log2(10) ) - x = mul_dd( x, log2_10); // x * log2(10) + x = mul_dd(x, log2_10); // x * log2(10) - //Deal with overflow and underflow for exp2(x) stage next - if( x.hi >= 1025 ) - return INFINITY; + // Deal with overflow and underflow for exp2(x) stage next + if (x.hi >= 1025) return INFINITY; - if( x.hi < -1075-24 ) - return +0.0; + if (x.hi < -1075 - 24) return +0.0; // find nearest integer to x - int i = (int) rint(x.hi); + int i = (int)rint(x.hi); // x now holds fractional part. The result would be then 2**i * exp2( x ) x.hi -= i; - // We could attempt to find a minimax polynomial for exp2(x) over the range x = [-0.5, 0.5]. - // However, this would converge very slowly near the extrema, where 0.5**n is not a lot different - // from 0.5**(n+1), thereby requiring something like a 20th order polynomial to get 53 + 24 bits - // of precision. Instead we further reduce the range to [-1/32, 1/32] by observing that + // We could attempt to find a minimax polynomial for exp2(x) over the range + // x = [-0.5, 0.5]. However, this would converge very slowly near the + // extrema, where 0.5**n is not a lot different from 0.5**(n+1), thereby + // requiring something like a 20th order polynomial to get 53 + 24 bits of + // precision. Instead we further reduce the range to [-1/32, 1/32] by + // observing that // // 2**(a+b) = 2**a * 2**b // - // We can thus build a table of 2**a values for a = n/16, n = [-8, 8], and reduce the range - // of x to [-1/32, 1/32] by subtracting away the nearest value of n/16 from x. - const double_double corrections[17] = - { - { HEX_DBL( +, 1, 6a09e667f3bcd, -, 1 ), HEX_DBL( -, 1, bdd3413b26456, -, 55 ) }, - { HEX_DBL( +, 1, 7a11473eb0187, -, 1 ), HEX_DBL( -, 1, 41577ee04992f, -, 56 ) }, - { HEX_DBL( +, 1, 8ace5422aa0db, -, 1 ), HEX_DBL( +, 1, 6e9f156864b27, -, 55 ) }, - { HEX_DBL( +, 1, 9c49182a3f09, -, 1 ), HEX_DBL( +, 1, c7c46b071f2be, -, 57 ) }, - { HEX_DBL( +, 1, ae89f995ad3ad, -, 1 ), HEX_DBL( +, 1, 7a1cd345dcc81, -, 55 ) }, - { HEX_DBL( +, 1, c199bdd85529c, -, 1 ), HEX_DBL( +, 1, 11065895048dd, -, 56 ) }, - { HEX_DBL( +, 1, d5818dcfba487, -, 1 ), HEX_DBL( +, 1, 2ed02d75b3707, -, 56 ) }, - { HEX_DBL( +, 1, ea4afa2a490da, -, 1 ), HEX_DBL( -, 1, e9c23179c2893, -, 55 ) }, - { HEX_DBL( +, 1, 0, +, 0 ), HEX_DBL( +, 0, 0, +, 0 ) }, - { HEX_DBL( +, 1, 0b5586cf9890f, +, 0 ), HEX_DBL( +, 1, 8a62e4adc610b, -, 54 ) }, - { HEX_DBL( +, 1, 172b83c7d517b, +, 0 ), HEX_DBL( -, 1, 19041b9d78a76, -, 55 ) }, - { HEX_DBL( +, 1, 2387a6e756238, +, 0 ), HEX_DBL( +, 1, 9b07eb6c70573, -, 54 ) }, - { HEX_DBL( +, 1, 306fe0a31b715, +, 0 ), HEX_DBL( +, 1, 6f46ad23182e4, -, 55 ) }, - { HEX_DBL( +, 1, 3dea64c123422, +, 0 ), HEX_DBL( +, 1, ada0911f09ebc, -, 55 ) }, - { HEX_DBL( +, 1, 4bfdad5362a27, +, 0 ), HEX_DBL( +, 1, d4397afec42e2, -, 56 ) }, - { HEX_DBL( +, 1, 5ab07dd485429, +, 0 ), HEX_DBL( +, 1, 6324c054647ad, -, 54 ) }, - { HEX_DBL( +, 1, 6a09e667f3bcd, +, 0 ), HEX_DBL( -, 1, bdd3413b26456, -, 54 ) } + // We can thus build a table of 2**a values for a = n/16, n = [-8, 8], and + // reduce the range of x to [-1/32, 1/32] by subtracting away the nearest + // value of n/16 from x. + const double_double corrections[17] = { + { HEX_DBL(+, 1, 6a09e667f3bcd, -, 1), + HEX_DBL(-, 1, bdd3413b26456, -, 55) }, + { HEX_DBL(+, 1, 7a11473eb0187, -, 1), + HEX_DBL(-, 1, 41577ee04992f, -, 56) }, + { HEX_DBL(+, 1, 8ace5422aa0db, -, 1), + HEX_DBL(+, 1, 6e9f156864b27, -, 55) }, + { HEX_DBL(+, 1, 9c49182a3f09, -, 1), + HEX_DBL(+, 1, c7c46b071f2be, -, 57) }, + { HEX_DBL(+, 1, ae89f995ad3ad, -, 1), + HEX_DBL(+, 1, 7a1cd345dcc81, -, 55) }, + { HEX_DBL(+, 1, c199bdd85529c, -, 1), + HEX_DBL(+, 1, 11065895048dd, -, 56) }, + { HEX_DBL(+, 1, d5818dcfba487, -, 1), + HEX_DBL(+, 1, 2ed02d75b3707, -, 56) }, + { HEX_DBL(+, 1, ea4afa2a490da, -, 1), + HEX_DBL(-, 1, e9c23179c2893, -, 55) }, + { HEX_DBL(+, 1, 0, +, 0), HEX_DBL(+, 0, 0, +, 0) }, + { HEX_DBL(+, 1, 0b5586cf9890f, +, 0), + HEX_DBL(+, 1, 8a62e4adc610b, -, 54) }, + { HEX_DBL(+, 1, 172b83c7d517b, +, 0), + HEX_DBL(-, 1, 19041b9d78a76, -, 55) }, + { HEX_DBL(+, 1, 2387a6e756238, +, 0), + HEX_DBL(+, 1, 9b07eb6c70573, -, 54) }, + { HEX_DBL(+, 1, 306fe0a31b715, +, 0), + HEX_DBL(+, 1, 6f46ad23182e4, -, 55) }, + { HEX_DBL(+, 1, 3dea64c123422, +, 0), + HEX_DBL(+, 1, ada0911f09ebc, -, 55) }, + { HEX_DBL(+, 1, 4bfdad5362a27, +, 0), + HEX_DBL(+, 1, d4397afec42e2, -, 56) }, + { HEX_DBL(+, 1, 5ab07dd485429, +, 0), + HEX_DBL(+, 1, 6324c054647ad, -, 54) }, + { HEX_DBL(+, 1, 6a09e667f3bcd, +, 0), + HEX_DBL(-, 1, bdd3413b26456, -, 54) } }; - int index = (int) rint( x.hi * 16.0 ); - x.hi -= (double) index * 0.0625; + int index = (int)rint(x.hi * 16.0); + x.hi -= (double)index * 0.0625; // canonicalize x double temp = x.hi; x.hi += x.lo; x.lo -= x.hi - temp; - // Minimax polynomial for (exp2(x)-1)/x, over the range [-1/32, 1/32]. Max Error: 2 * 0x1.e112p-87 - const double_double c[] = { - {HEX_DBL( +, 1, 62e42fefa39ef, -, 1 ), HEX_DBL( +, 1, abc9e3ac1d244, -, 56 )}, - {HEX_DBL( +, 1, ebfbdff82c58f, -, 3 ), HEX_DBL( -, 1, 5e4987a631846, -, 57 )}, - {HEX_DBL( +, 1, c6b08d704a0c, -, 5 ), HEX_DBL( -, 1, d323200a05713, -, 59 )}, - {HEX_DBL( +, 1, 3b2ab6fba4e7a, -, 7 ), HEX_DBL( +, 1, c5ee8f8b9f0c1, -, 63 )}, - {HEX_DBL( +, 1, 5d87fe78a672a, -, 10 ), HEX_DBL( +, 1, 884e5e5cc7ecc, -, 64 )}, - {HEX_DBL( +, 1, 430912f7e8373, -, 13 ), HEX_DBL( +, 1, 4f1b59514a326, -, 67 )}, - {HEX_DBL( +, 1, ffcbfc5985e71, -, 17 ), HEX_DBL( -, 1, db7d6a0953b78, -, 71 )}, - {HEX_DBL( +, 1, 62c150eb16465, -, 20 ), HEX_DBL( +, 1, e0767c2d7abf5, -, 80 )}, - {HEX_DBL( +, 1, b52502b5e953, -, 24 ), HEX_DBL( +, 1, 6797523f944bc, -, 78 )} - }; - size_t count = sizeof( c ) / sizeof( c[0] ); + // Minimax polynomial for (exp2(x)-1)/x, over the range [-1/32, 1/32]. Max + // Error: 2 * 0x1.e112p-87 + const double_double c[] = { { HEX_DBL(+, 1, 62e42fefa39ef, -, 1), + HEX_DBL(+, 1, abc9e3ac1d244, -, 56) }, + { HEX_DBL(+, 1, ebfbdff82c58f, -, 3), + HEX_DBL(-, 1, 5e4987a631846, -, 57) }, + { HEX_DBL(+, 1, c6b08d704a0c, -, 5), + HEX_DBL(-, 1, d323200a05713, -, 59) }, + { HEX_DBL(+, 1, 3b2ab6fba4e7a, -, 7), + HEX_DBL(+, 1, c5ee8f8b9f0c1, -, 63) }, + { HEX_DBL(+, 1, 5d87fe78a672a, -, 10), + HEX_DBL(+, 1, 884e5e5cc7ecc, -, 64) }, + { HEX_DBL(+, 1, 430912f7e8373, -, 13), + HEX_DBL(+, 1, 4f1b59514a326, -, 67) }, + { HEX_DBL(+, 1, ffcbfc5985e71, -, 17), + HEX_DBL(-, 1, db7d6a0953b78, -, 71) }, + { HEX_DBL(+, 1, 62c150eb16465, -, 20), + HEX_DBL(+, 1, e0767c2d7abf5, -, 80) }, + { HEX_DBL(+, 1, b52502b5e953, -, 24), + HEX_DBL(+, 1, 6797523f944bc, -, 78) } }; + size_t count = sizeof(c) / sizeof(c[0]); // Do polynomial - double_double r = c[count-1]; - for( j = (int) count-2; j >= 0; j-- ) - r = add_dd( c[j], mul_dd( r, x ) ); + double_double r = c[count - 1]; + for (j = (int)count - 2; j >= 0; j--) r = add_dd(c[j], mul_dd(r, x)); // unwind approximation - r = mul_dd( r, x ); // before: r =(exp2(x)-1)/x; after: r = exp2(x) - 1 + r = mul_dd(r, x); // before: r =(exp2(x)-1)/x; after: r = exp2(x) - 1 // correct for [-0.5, 0.5] -> [-1/32, 1/32] reduction above // exp2(x) = (r + 1) * correction = r * correction + correction - r = mul_dd( r, corrections[index+8] ); - r = add_dd( r, corrections[index+8] ); + r = mul_dd(r, corrections[index + 8]); + r = add_dd(r, corrections[index + 8]); -// Format result for output: + // Format result for output: // Get mantissa - long double m = ((long double) r.hi + (long double) r.lo ); + long double m = ((long double)r.hi + (long double)r.lo); // Handle a pesky overflow cases when long double = double - if( i > 512 ) + if (i > 512) { - m *= HEX_DBL( +, 1, 0, +, 512 ); + m *= HEX_DBL(+, 1, 0, +, 512); i -= 512; } - else if( i < -512 ) + else if (i < -512) { - m *= HEX_DBL( +, 1, 0, -, 512 ); + m *= HEX_DBL(+, 1, 0, -, 512); i += 512; } - return m * ldexpl( 1.0L, i ); + return m * ldexpl(1.0L, i); } -static double fallback_frexp( double x, int *iptr ) +static double fallback_frexp(double x, int *iptr) { cl_ulong u, v; double fu, fv; - memcpy( &u, &x, sizeof(u)); + memcpy(&u, &x, sizeof(u)); - cl_ulong exponent = u & 0x7ff0000000000000ULL; + cl_ulong exponent = u & 0x7ff0000000000000ULL; cl_ulong mantissa = u & ~0x7ff0000000000000ULL; // add 1 to the exponent exponent += 0x0010000000000000ULL; - if( (cl_long) exponent < (cl_long) 0x0020000000000000LL ) + if ((cl_long)exponent < (cl_long)0x0020000000000000LL) { // subnormal, NaN, Inf mantissa |= 0x3fe0000000000000ULL; v = mantissa & 0xfff0000000000000ULL; u = mantissa; - memcpy( &fv, &v, sizeof(v)); - memcpy( &fu, &u, sizeof(u)); + memcpy(&fv, &v, sizeof(v)); + memcpy(&fu, &u, sizeof(u)); fu -= fv; - memcpy( &v, &fv, sizeof(v)); - memcpy( &u, &fu, sizeof(u)); + memcpy(&v, &fv, sizeof(v)); + memcpy(&u, &fu, sizeof(u)); - exponent = u & 0x7ff0000000000000ULL; + exponent = u & 0x7ff0000000000000ULL; mantissa = u & ~0x7ff0000000000000ULL; - *iptr = (exponent >> 52) + (-1022 + 1 -1022); + *iptr = (exponent >> 52) + (-1022 + 1 - 1022); u = mantissa | 0x3fe0000000000000ULL; - memcpy( &fu, &u, sizeof(u)); + memcpy(&fu, &u, sizeof(u)); return fu; } *iptr = (exponent >> 52) - 1023; u = mantissa | 0x3fe0000000000000ULL; - memcpy( &fu, &u, sizeof(u)); + memcpy(&fu, &u, sizeof(u)); return fu; } // Assumes zeros, infinities and NaNs handed elsewhere -static inline int extract( double x, cl_ulong *mant ); -static inline int extract( double x, cl_ulong *mant ) +static inline int extract(double x, cl_ulong *mant) { - static double (*frexpp)(double, int*) = NULL; + static double (*frexpp)(double, int *) = NULL; int e; // verify that frexp works properly - if( NULL == frexpp ) + if (NULL == frexpp) { - if( 0.5 == frexp( HEX_DBL( +, 1, 0, -, 1030 ), &e ) && e == -1029 ) + if (0.5 == frexp(HEX_DBL(+, 1, 0, -, 1030), &e) && e == -1029) frexpp = frexp; else frexpp = fallback_frexp; } - *mant = (cl_ulong) (HEX_DBL( +, 1, 0, +, 64 ) * fabs( frexpp( x, &e ))); + *mant = (cl_ulong)(HEX_DBL(+, 1, 0, +, 64) * fabs(frexpp(x, &e))); return e - 1; } // Return 128-bit product of a*b as (hi << 64) + lo -static inline void mul128( cl_ulong a, cl_ulong b, cl_ulong *hi, cl_ulong *lo ); -static inline void mul128( cl_ulong a, cl_ulong b, cl_ulong *hi, cl_ulong *lo ) +static inline void mul128(cl_ulong a, cl_ulong b, cl_ulong *hi, cl_ulong *lo) { cl_ulong alo = a & 0xffffffffULL; cl_ulong ahi = a >> 32; @@ -2393,92 +2670,90 @@ static inline void mul128( cl_ulong a, cl_ulong b, cl_ulong *hi, cl_ulong *lo ) cl_ulong ahiblo = ahi * blo; cl_ulong ahibhi = ahi * bhi; - alobhi += (aloblo >> 32) + (ahiblo & 0xffffffffULL); // cannot overflow: (2^32-1)^2 + 2 * (2^32-1) = (2^64 - 2^33 + 1) + (2^33 - 2) = 2^64 - 1 - *hi = ahibhi + (alobhi >> 32) + (ahiblo >> 32); // cannot overflow: (2^32-1)^2 + 2 * (2^32-1) = (2^64 - 2^33 + 1) + (2^33 - 2) = 2^64 - 1 + alobhi += (aloblo >> 32) + + (ahiblo + & 0xffffffffULL); // cannot overflow: (2^32-1)^2 + 2 * (2^32-1) = + // (2^64 - 2^33 + 1) + (2^33 - 2) = 2^64 - 1 + *hi = ahibhi + (alobhi >> 32) + + (ahiblo >> 32); // cannot overflow: (2^32-1)^2 + 2 * (2^32-1) = + // (2^64 - 2^33 + 1) + (2^33 - 2) = 2^64 - 1 *lo = (aloblo & 0xffffffffULL) | (alobhi << 32); } -// Move the most significant non-zero bit to the MSB -// Note: not general. Only works if the most significant non-zero bit is at MSB-1 -static inline void renormalize( cl_ulong *hi, cl_ulong *lo, int *exponent ) +static double round_to_nearest_even_double(cl_ulong hi, cl_ulong lo, + int exponent) { - if( 0 == (0x8000000000000000ULL & *hi )) - { - *hi <<= 1; - *hi |= *lo >> 63; - *lo <<= 1; - *exponent -= 1; - } -} - -static double round_to_nearest_even_double( cl_ulong hi, cl_ulong lo, int exponent ); -static double round_to_nearest_even_double( cl_ulong hi, cl_ulong lo, int exponent ) -{ - union{ cl_ulong u; cl_double d;} u; + union { + cl_ulong u; + cl_double d; + } u; // edges - if( exponent > 1023 ) return INFINITY; - if( exponent == -1075 && (hi | (lo!=0)) > 0x8000000000000000ULL ) - return HEX_DBL( +, 1, 0, -, 1074 ); - if( exponent <= -1075 ) return 0.0; + if (exponent > 1023) return INFINITY; + if (exponent == -1075 && (hi | (lo != 0)) > 0x8000000000000000ULL) + return HEX_DBL(+, 1, 0, -, 1074); + if (exponent <= -1075) return 0.0; - //Figure out which bits go where + // Figure out which bits go where int shift = 11; - if( exponent < -1022 ) + if (exponent < -1022) { - shift -= 1022 + exponent; // subnormal: shift is not 52 - exponent = -1023; // set exponent to 0 + shift -= 1022 + exponent; // subnormal: shift is not 52 + exponent = -1023; // set exponent to 0 } else - hi &= 0x7fffffffffffffffULL; // normal: leading bit is implicit. Remove it. + hi &= 0x7fffffffffffffffULL; // normal: leading bit is implicit. Remove + // it. // Assemble the double (round toward zero) - u.u = (hi >> shift) | ((cl_ulong) (exponent + 1023) << 52); + u.u = (hi >> shift) | ((cl_ulong)(exponent + 1023) << 52); // put a representation of the residual bits into hi - hi <<= (64-shift); + hi <<= (64 - shift); hi |= lo >> shift; - lo <<= (64-shift ); + lo <<= (64 - shift); hi |= lo != 0; - //round to nearest, ties to even - if( hi < 0x8000000000000000ULL ) return u.d; - if( hi == 0x8000000000000000ULL ) u.u += u.u & 1ULL; - else u.u++; + // round to nearest, ties to even + if (hi < 0x8000000000000000ULL) return u.d; + if (hi == 0x8000000000000000ULL) + u.u += u.u & 1ULL; + else + u.u++; return u.d; } -// Shift right. Bits lost on the right will be OR'd together and OR'd with the LSB -static inline void shift_right_sticky_128( cl_ulong *hi, cl_ulong *lo, int shift ); -static inline void shift_right_sticky_128( cl_ulong *hi, cl_ulong *lo, int shift ) +// Shift right. Bits lost on the right will be OR'd together and OR'd with the +// LSB +static inline void shift_right_sticky_128(cl_ulong *hi, cl_ulong *lo, int shift) { cl_ulong sticky = 0; cl_ulong h = *hi; cl_ulong l = *lo; - if( shift >= 64 ) + if (shift >= 64) { shift -= 64; sticky = 0 != lo; l = h; h = 0; - if( shift >= 64 ) + if (shift >= 64) { sticky |= (0 != l); l = 0; } else { - sticky |= (0 != (l << (64-shift))); + sticky |= (0 != (l << (64 - shift))); l >>= shift; } } else { - sticky |= (0 != (l << (64-shift))); + sticky |= (0 != (l << (64 - shift))); l >>= shift; - l |= h << (64-shift); + l |= h << (64 - shift); h >>= shift; } @@ -2487,9 +2762,10 @@ static inline void shift_right_sticky_128( cl_ulong *hi, cl_ulong *lo, int shift } // 128-bit add of ((*hi << 64) + *lo) + ((chi << 64) + clo) -// If the 129 bit result doesn't fit, bits lost off the right end will be OR'd with the LSB -static inline void add128( cl_ulong *hi, cl_ulong *lo, cl_ulong chi, cl_ulong clo, int *exp ); -static inline void add128( cl_ulong *hi, cl_ulong *lo, cl_ulong chi, cl_ulong clo, int *exponent ) +// If the 129 bit result doesn't fit, bits lost off the right end will be OR'd +// with the LSB +static inline void add128(cl_ulong *hi, cl_ulong *lo, cl_ulong chi, + cl_ulong clo, int *exponent) { cl_ulong carry, carry2; // extended precision add @@ -2497,15 +2773,16 @@ static inline void add128( cl_ulong *hi, cl_ulong *lo, cl_ulong chi, cl_ulong cl chi = add_carry(*hi, chi, &carry2); chi = add_carry(chi, carry, &carry); - //If we overflowed the 128 bit result - if( carry || carry2 ) + // If we overflowed the 128 bit result + if (carry || carry2) { - carry = clo & 1; // set aside low bit - clo >>= 1; // right shift low 1 - clo |= carry; // or back in the low bit, so we don't come to believe this is an exact half way case for rounding - clo |= chi << 63; // move lowest high bit into highest bit of lo - chi >>= 1; // right shift hi - chi |= 0x8000000000000000ULL; // move the carry bit into hi. + carry = clo & 1; // set aside low bit + clo >>= 1; // right shift low 1 + clo |= carry; // or back in the low bit, so we don't come to believe + // this is an exact half way case for rounding + clo |= chi << 63; // move lowest high bit into highest bit of lo + chi >>= 1; // right shift hi + chi |= 0x8000000000000000ULL; // move the carry bit into hi. *exponent = *exponent + 1; } @@ -2514,48 +2791,49 @@ static inline void add128( cl_ulong *hi, cl_ulong *lo, cl_ulong chi, cl_ulong cl } // 128-bit subtract of ((chi << 64) + clo) - ((*hi << 64) + *lo) -static inline void sub128( cl_ulong *chi, cl_ulong *clo, cl_ulong hi, cl_ulong lo, cl_ulong *signC, int *expC ); -static inline void sub128( cl_ulong *chi, cl_ulong *clo, cl_ulong hi, cl_ulong lo, cl_ulong *signC, int *expC ) +static inline void sub128(cl_ulong *chi, cl_ulong *clo, cl_ulong hi, + cl_ulong lo, cl_ulong *signC, int *expC) { cl_ulong rHi = *chi; cl_ulong rLo = *clo; cl_ulong carry, carry2; - //extended precision subtract + // extended precision subtract rLo = sub_carry(rLo, lo, &carry); rHi = sub_carry(rHi, hi, &carry2); rHi = sub_carry(rHi, carry, &carry); // Check for sign flip - if( carry || carry2 ) + if (carry || carry2) { *signC ^= 0x8000000000000000ULL; - //negate rLo, rHi: -x = (x ^ -1) + 1 + // negate rLo, rHi: -x = (x ^ -1) + 1 rLo ^= -1ULL; rHi ^= -1ULL; rLo++; rHi += 0 == rLo; } - // normalize -- move the most significant non-zero bit to the MSB, and adjust exponent accordingly - if( rHi == 0 ) + // normalize -- move the most significant non-zero bit to the MSB, and + // adjust exponent accordingly + if (rHi == 0) { rHi = rLo; *expC = *expC - 64; rLo = 0; } - if( rHi ) + if (rHi) { int shift = 32; cl_ulong test = 1ULL << 32; - while( 0 == (rHi & 0x8000000000000000ULL)) + while (0 == (rHi & 0x8000000000000000ULL)) { - if( rHi < test ) + if (rHi < test) { rHi <<= shift; - rHi |= rLo >> (64-shift); + rHi |= rLo >> (64 - shift); rLo <<= shift; *expC = *expC - shift; } @@ -2565,7 +2843,7 @@ static inline void sub128( cl_ulong *chi, cl_ulong *clo, cl_ulong hi, cl_ulong l } else { - //zero + // zero *expC = INT_MIN; *signC = 0; } @@ -2575,7 +2853,7 @@ static inline void sub128( cl_ulong *chi, cl_ulong *clo, cl_ulong hi, cl_ulong l *clo = rLo; } -long double reference_fmal( long double x, long double y, long double z) +long double reference_fmal(long double x, long double y, long double z) { static const cl_ulong kMSB = 0x8000000000000000ULL; @@ -2585,75 +2863,91 @@ long double reference_fmal( long double x, long double y, long double z) double c = z; // Make bits accessible - union{ cl_ulong u; cl_double d; } ua; ua.d = a; - union{ cl_ulong u; cl_double d; } ub; ub.d = b; - union{ cl_ulong u; cl_double d; } uc; uc.d = c; + union { + cl_ulong u; + cl_double d; + } ua; + ua.d = a; + union { + cl_ulong u; + cl_double d; + } ub; + ub.d = b; + union { + cl_ulong u; + cl_double d; + } uc; + uc.d = c; // deal with Nans, infinities and zeros - if( isnan( a ) || isnan( b ) || isnan(c) || - isinf( a ) || isinf( b ) || isinf(c) || - 0 == ( ua.u & ~kMSB) || // a == 0, defeat host FTZ behavior - 0 == ( ub.u & ~kMSB) || // b == 0, defeat host FTZ behavior - 0 == ( uc.u & ~kMSB) ) // c == 0, defeat host FTZ behavior + if (isnan(a) || isnan(b) || isnan(c) || isinf(a) || isinf(b) || isinf(c) + || 0 == (ua.u & ~kMSB) || // a == 0, defeat host FTZ behavior + 0 == (ub.u & ~kMSB) || // b == 0, defeat host FTZ behavior + 0 == (uc.u & ~kMSB)) // c == 0, defeat host FTZ behavior { - if( isinf( c ) && !isinf(a) && !isinf(b) ) - return (c + a) + b; + if (isinf(c) && !isinf(a) && !isinf(b)) return (c + a) + b; - a = (double) reference_multiplyl( a, b ); // some risk that the compiler will insert a non-compliant fma here on some platforms. - return reference_addl(a, c); // We use STDC FP_CONTRACT OFF above to attempt to defeat that. + a = (double)reference_multiplyl( + a, b); // some risk that the compiler will insert a non-compliant + // fma here on some platforms. + return reference_addl( + a, + c); // We use STDC FP_CONTRACT OFF above to attempt to defeat that. } // extract exponent and mantissa // exponent is a standard unbiased signed integer // mantissa is a cl_uint, with leading non-zero bit positioned at the MSB cl_ulong mantA, mantB, mantC; - int expA = extract( a, &mantA ); - int expB = extract( b, &mantB ); - int expC = extract( c, &mantC ); - cl_ulong signC = uc.u & kMSB; // We'll need the sign bit of C later to decide if we are adding or subtracting + int expA = extract(a, &mantA); + int expB = extract(b, &mantB); + int expC = extract(c, &mantC); + cl_ulong signC = uc.u & kMSB; // We'll need the sign bit of C later to + // decide if we are adding or subtracting -// exact product of A and B + // exact product of A and B int exponent = expA + expB; cl_ulong sign = (ua.u ^ ub.u) & kMSB; cl_ulong hi, lo; - mul128( mantA, mantB, &hi, &lo ); + mul128(mantA, mantB, &hi, &lo); // renormalize - if( 0 == (kMSB & hi) ) + if (0 == (kMSB & hi)) { hi <<= 1; hi |= lo >> 63; lo <<= 1; } else - exponent++; // 2**63 * 2**63 gives 2**126. If the MSB was set, then our exponent increased. + exponent++; // 2**63 * 2**63 gives 2**126. If the MSB was set, then our + // exponent increased. -//infinite precision add + // infinite precision add cl_ulong chi = mantC; cl_ulong clo = 0; - if( exponent >= expC ) + if (exponent >= expC) { // Normalize C relative to the product - if( exponent > expC ) - shift_right_sticky_128( &chi, &clo, exponent - expC ); + if (exponent > expC) + shift_right_sticky_128(&chi, &clo, exponent - expC); // Add - if( sign ^ signC ) - sub128( &hi, &lo, chi, clo, &sign, &exponent ); + if (sign ^ signC) + sub128(&hi, &lo, chi, clo, &sign, &exponent); else - add128( &hi, &lo, chi, clo, &exponent ); + add128(&hi, &lo, chi, clo, &exponent); } else { // Shift the product relative to C so that their exponents match - shift_right_sticky_128( &hi, &lo, expC - exponent ); + shift_right_sticky_128(&hi, &lo, expC - exponent); // add - if( sign ^ signC ) - sub128( &chi, &clo, hi, lo, &signC, &expC ); + if (sign ^ signC) + sub128(&chi, &clo, hi, lo, &signC, &expC); else - add128( &chi, &clo, hi, lo, &expC ); + add128(&chi, &clo, hi, lo, &expC); hi = chi; lo = clo; @@ -2671,61 +2965,49 @@ long double reference_fmal( long double x, long double y, long double z) } +long double reference_madl(long double a, long double b, long double c) +{ + return a * b + c; +} +long double reference_recipl(long double x) { return 1.0L / x; } -long double reference_madl( long double a, long double b, long double c) { return a * b + c; } - -//long double my_nextafterl(long double x, long double y){ return (long double) nextafter( (double) x, (double) y ); } - -long double reference_recipl( long double x){ return 1.0L / x; } - -long double reference_rootnl( long double x, int i) +long double reference_rootnl(long double x, int i) { - double hi, lo; - long double l; - //rootn ( x, 0 ) returns a NaN. - if( 0 == i ) - return cl_make_nan(); + // rootn ( x, 0 ) returns a NaN. + if (0 == i) return cl_make_nan(); - //rootn ( x, n ) returns a NaN for x < 0 and n is even. - if( x < 0.0L && 0 == (i&1) ) - return cl_make_nan(); + // rootn ( x, n ) returns a NaN for x < 0 and n is even. + if (x < 0.0L && 0 == (i & 1)) return cl_make_nan(); - if( isinf(x) ) + if (isinf(x)) { - if( i < 0 ) - return reference_copysignl(0.0L, x); + if (i < 0) return reference_copysignl(0.0L, x); return x; } - if( x == 0.0 ) + if (x == 0.0) { - switch( i & 0x80000001 ) + switch (i & 0x80000001) { - //rootn ( +-0, n ) is +0 for even n > 0. - case 0: - return 0.0L; + // rootn ( +-0, n ) is +0 for even n > 0. + case 0: return 0.0L; - //rootn ( +-0, n ) is +-0 for odd n > 0. - case 1: - return x; + // rootn ( +-0, n ) is +-0 for odd n > 0. + case 1: return x; - //rootn ( +-0, n ) is +inf for even n < 0. - case 0x80000000: - return INFINITY; + // rootn ( +-0, n ) is +inf for even n < 0. + case 0x80000000: return INFINITY; - //rootn ( +-0, n ) is +-inf for odd n < 0. - case 0x80000001: - return copysign(INFINITY, x); + // rootn ( +-0, n ) is +-inf for odd n < 0. + case 0x80000001: return copysign(INFINITY, x); } } - if( i == 1 ) - return x; + if (i == 1) return x; - if( i == -1 ) - return 1.0 / x; + if (i == -1) return 1.0 / x; long double sign = x; x = reference_fabsl(x); @@ -2733,197 +3015,186 @@ long double reference_rootnl( long double x, int i) DivideDD(&iHi, &iLo, 1.0, i); x = reference_powl(x, iHi) * reference_powl(x, iLo); - return reference_copysignl( x, sign ); - + return reference_copysignl(x, sign); } -long double reference_rsqrtl( long double x){ return 1.0L / sqrtl(x); } -//long double reference_sincosl( long double x, long double *c ){ *c = reference_cosl(x); return reference_sinl(x); } -long double reference_sinpil( long double x) +long double reference_rsqrtl(long double x) { return 1.0L / sqrtl(x); } + +long double reference_sinpil(long double x) { double r = reduce1l(x); // reduce to [-0.5, 0.5] - if( r < -0.5L ) + if (r < -0.5L) r = -1.0L - r; - else if ( r > 0.5L ) + else if (r > 0.5L) r = 1.0L - r; // sinPi zeros have the same sign as x - if( r == 0.0L ) - return reference_copysignl(0.0L, x); + if (r == 0.0L) return reference_copysignl(0.0L, x); - return reference_sinl( r * M_PIL ); + return reference_sinl(r * M_PIL); } -long double reference_tanpil( long double x) +long double reference_tanpil(long double x) { // set aside the sign (allows us to preserve sign of -0) - long double sign = reference_copysignl( 1.0L, x); + long double sign = reference_copysignl(1.0L, x); long double z = reference_fabsl(x); // if big and even -- caution: only works if x only has single precision - if( z >= HEX_LDBL( +, 1, 0, +, 53 ) ) + if (z >= HEX_LDBL(+, 1, 0, +, 53)) { - if( z == INFINITY ) - return x - x; // nan + if (z == INFINITY) return x - x; // nan - return reference_copysignl( 0.0L, x); // tanpi ( n ) is copysign( 0.0, n) for even integers n. + return reference_copysignl( + 0.0L, x); // tanpi ( n ) is copysign( 0.0, n) for even integers n. } // reduce to the range [ -0.5, 0.5 ] - long double nearest = reference_rintl( z ); // round to nearest even places n + 0.5 values in the right place for us - int64_t i = (int64_t) nearest; // test above against 0x1.0p53 avoids overflow here + long double nearest = + reference_rintl(z); // round to nearest even places n + 0.5 values in + // the right place for us + int64_t i = + (int64_t)nearest; // test above against 0x1.0p53 avoids overflow here z -= nearest; - //correction for odd integer x for the right sign of zero - if( (i&1) && z == 0.0L ) - sign = -sign; + // correction for odd integer x for the right sign of zero + if ((i & 1) && z == 0.0L) sign = -sign; // track changes to the sign - sign *= reference_copysignl(1.0L, z); // really should just be an xor - z = reference_fabsl(z); // remove the sign again + sign *= reference_copysignl(1.0L, z); // really should just be an xor + z = reference_fabsl(z); // remove the sign again // reduce once more - // If we don't do this, rounding error in z * M_PI will cause us not to return infinities properly - if( z > 0.25L ) + // If we don't do this, rounding error in z * M_PI will cause us not to + // return infinities properly + if (z > 0.25L) { z = 0.5L - z; - return sign / reference_tanl( z * M_PIL ); // use system tan to get the right result + return sign + / reference_tanl(z + * M_PIL); // use system tan to get the right result } // - return sign * reference_tanl( z * M_PIL ); // use system tan to get the right result + return sign + * reference_tanl(z * M_PIL); // use system tan to get the right result } -long double reference_pownl( long double x, int i ){ return reference_powl( x, (long double) i ); } +long double reference_pownl(long double x, int i) +{ + return reference_powl(x, (long double)i); +} -long double reference_powrl( long double x, long double y ) +long double reference_powrl(long double x, long double y) { - //powr ( x, y ) returns NaN for x < 0. - if( x < 0.0L ) - return cl_make_nan(); + // powr ( x, y ) returns NaN for x < 0. + if (x < 0.0L) return cl_make_nan(); - //powr ( x, NaN ) returns the NaN for x >= 0. - //powr ( NaN, y ) returns the NaN. - if( isnan(x) || isnan(y) ) - return x + y; // Note: behavior different here than for pow(1,NaN), pow(NaN, 0) + // powr ( x, NaN ) returns the NaN for x >= 0. + // powr ( NaN, y ) returns the NaN. + if (isnan(x) || isnan(y)) + return x + y; // Note: behavior different here than for pow(1,NaN), + // pow(NaN, 0) - if( x == 1.0L ) + if (x == 1.0L) { - //powr ( +1, +-inf ) returns NaN. - if( reference_fabsl(y) == INFINITY ) - return cl_make_nan(); + // powr ( +1, +-inf ) returns NaN. + if (reference_fabsl(y) == INFINITY) return cl_make_nan(); - //powr ( +1, y ) is 1 for finite y. (NaN handled above) + // powr ( +1, y ) is 1 for finite y. (NaN handled above) return 1.0L; } - if( y == 0.0L ) + if (y == 0.0L) { - //powr ( +inf, +-0 ) returns NaN. - //powr ( +-0, +-0 ) returns NaN. - if( x == 0.0L || x == INFINITY ) - return cl_make_nan(); + // powr ( +inf, +-0 ) returns NaN. + // powr ( +-0, +-0 ) returns NaN. + if (x == 0.0L || x == INFINITY) return cl_make_nan(); - //powr ( x, +-0 ) is 1 for finite x > 0. (x <= 0, NaN, INF already handled above) + // powr ( x, +-0 ) is 1 for finite x > 0. (x <= 0, NaN, INF already + // handled above) return 1.0L; } - if( x == 0.0L ) + if (x == 0.0L) { - //powr ( +-0, -inf) is +inf. - //powr ( +-0, y ) is +inf for finite y < 0. - if( y < 0.0L ) - return INFINITY; + // powr ( +-0, -inf) is +inf. + // powr ( +-0, y ) is +inf for finite y < 0. + if (y < 0.0L) return INFINITY; - //powr ( +-0, y ) is +0 for y > 0. (NaN, y==0 handled above) + // powr ( +-0, y ) is +0 for y > 0. (NaN, y==0 handled above) return 0.0L; } - return reference_powl( x, y ); + return reference_powl(x, y); } -//long double my_fdiml( long double x, long double y){ return fdim( (double) x, (double) y ); } -long double reference_addl( long double x, long double y) +long double reference_addl(long double x, long double y) { - volatile double a = (double) x; - volatile double b = (double) y; + volatile double a = (double)x; + volatile double b = (double)y; -#if defined( __SSE2__ ) +#if defined(__SSE2__) // defeat x87 - __m128d va = _mm_set_sd( (double) a ); - __m128d vb = _mm_set_sd( (double) b ); - va = _mm_add_sd( va, vb ); - _mm_store_sd( (double*) &a, va ); + __m128d va = _mm_set_sd((double)a); + __m128d vb = _mm_set_sd((double)b); + va = _mm_add_sd(va, vb); + _mm_store_sd((double *)&a, va); #else a += b; #endif - return (long double) a; + return (long double)a; } -long double reference_subtractl( long double x, long double y) +long double reference_subtractl(long double x, long double y) { - volatile double a = (double) x; - volatile double b = (double) y; + volatile double a = (double)x; + volatile double b = (double)y; -#if defined( __SSE2__ ) +#if defined(__SSE2__) // defeat x87 - __m128d va = _mm_set_sd( (double) a ); - __m128d vb = _mm_set_sd( (double) b ); - va = _mm_sub_sd( va, vb ); - _mm_store_sd( (double*) &a, va ); + __m128d va = _mm_set_sd((double)a); + __m128d vb = _mm_set_sd((double)b); + va = _mm_sub_sd(va, vb); + _mm_store_sd((double *)&a, va); #else a -= b; #endif - return (long double) a; + return (long double)a; } -long double reference_multiplyl( long double x, long double y) +long double reference_multiplyl(long double x, long double y) { - volatile double a = (double) x; - volatile double b = (double) y; + volatile double a = (double)x; + volatile double b = (double)y; -#if defined( __SSE2__ ) +#if defined(__SSE2__) // defeat x87 - __m128d va = _mm_set_sd( (double) a ); - __m128d vb = _mm_set_sd( (double) b ); - va = _mm_mul_sd( va, vb ); - _mm_store_sd( (double*) &a, va ); + __m128d va = _mm_set_sd((double)a); + __m128d vb = _mm_set_sd((double)b); + va = _mm_mul_sd(va, vb); + _mm_store_sd((double *)&a, va); #else a *= b; #endif - return (long double) a; + return (long double)a; } -/*long double my_remquol( long double x, long double y, int *iptr ) -{ - if( isnan(x) || isnan(y) || - fabs(x) == INFINITY || - y == 0.0 ) - { - *iptr = 0; - return NAN; - } - - return remquo( (double) x, (double) y, iptr ); -}*/ -long double reference_lgamma_rl( long double x, int *signp ) +long double reference_lgamma_rl(long double x, int *signp) { -// long double lgamma_val = (long double)reference_lgamma( (double)x ); -// *signp = signgam; *signp = 0; return x; } - -int reference_isequall( long double x, long double y){ return x == y; } -int reference_isfinitel( long double x){ return 0 != isfinite(x); } -int reference_isgreaterl( long double x, long double y){ return x > y; } -int reference_isgreaterequall( long double x, long double y){ return x >= y; } -int reference_isinfl( long double x){ return 0 != isinf(x); } -int reference_islessl( long double x, long double y){ return x < y; } -int reference_islessequall( long double x, long double y){ return x <= y; } +int reference_isequall(long double x, long double y) { return x == y; } +int reference_isfinitel(long double x) { return 0 != isfinite(x); } +int reference_isgreaterl(long double x, long double y) { return x > y; } +int reference_isgreaterequall(long double x, long double y) { return x >= y; } +int reference_isinfl(long double x) { return 0 != isinf(x); } +int reference_islessl(long double x, long double y) { return x < y; } +int reference_islessequall(long double x, long double y) { return x <= y; } #if defined(__INTEL_COMPILER) int reference_islessgreaterl(long double x, long double y) { @@ -2935,69 +3206,76 @@ int reference_islessgreaterl(long double x, long double y) return 0 != islessgreater(x, y); } #endif -int reference_isnanl( long double x){ return 0 != isnan( x ); } -int reference_isnormall( long double x){ return 0 != isnormal( (double) x ); } -int reference_isnotequall( long double x, long double y){ return x != y; } -int reference_isorderedl( long double x, long double y){ return x == x && y == y; } -int reference_isunorderedl( long double x, long double y){ return isnan(x) || isnan( y ); } -#if defined( __INTEL_COMPILER ) -int reference_signbitl( long double x){ return 0 != signbitl( x ); } +int reference_isnanl(long double x) { return 0 != isnan(x); } +int reference_isnormall(long double x) { return 0 != isnormal((double)x); } +int reference_isnotequall(long double x, long double y) { return x != y; } +int reference_isorderedl(long double x, long double y) +{ + return x == x && y == y; +} +int reference_isunorderedl(long double x, long double y) +{ + return isnan(x) || isnan(y); +} +#if defined(__INTEL_COMPILER) +int reference_signbitl(long double x) { return 0 != signbitl(x); } #else -int reference_signbitl( long double x){ return 0 != signbit( x ); } +int reference_signbitl(long double x) { return 0 != signbit(x); } #endif -long double reference_copysignl( long double x, long double y); -long double reference_roundl( long double x ); +long double reference_copysignl(long double x, long double y); +long double reference_roundl(long double x); long double reference_cbrtl(long double x); -long double reference_copysignl( long double x, long double y ) +long double reference_copysignl(long double x, long double y) { - // We hope that the long double to double conversion proceeds with sign fidelity, - // even for zeros and NaNs - union{ double d; cl_ulong u;}u; u.d = (double) y; + // We hope that the long double to double conversion proceeds with sign + // fidelity, even for zeros and NaNs + union { + double d; + cl_ulong u; + } u; + u.d = (double)y; x = reference_fabsl(x); - if( u.u >> 63 ) - x = -x; + if (u.u >> 63) x = -x; return x; } -long double reference_roundl( long double x ) +long double reference_roundl(long double x) { // Since we are just using this to verify double precision, we can // use the double precision copysign here #if defined(__MINGW32__) && defined(__x86_64__) long double absx = reference_fabsl(x); - if (absx < 0.5L) - return reference_copysignl(0.0L, x); + if (absx < 0.5L) return reference_copysignl(0.0L, x); #endif - return round( (double) x ); + return round((double)x); } -long double reference_truncl( long double x ) +long double reference_truncl(long double x) { // Since we are just using this to verify double precision, we can // use the double precision copysign here - return trunc( (double) x ); + return trunc((double)x); } static long double reference_scalblnl(long double x, long n); long double reference_cbrtl(long double x) { - double yhi = HEX_DBL( +, 1, 5555555555555, -, 2 ); - double ylo = HEX_DBL( +, 1, 558, -, 56 ); + double yhi = HEX_DBL(+, 1, 5555555555555, -, 2); + double ylo = HEX_DBL(+, 1, 558, -, 56); - double fabsx = reference_fabs( x ); + double fabsx = reference_fabs(x); - if( isnan(x) || fabsx == 1.0 || fabsx == 0.0 || isinf(x) ) - return x; + if (isnan(x) || fabsx == 1.0 || fabsx == 0.0 || isinf(x)) return x; - double iy = 0.0; double log2x_hi, log2x_lo; - // extended precision log .... accurate to at least 64-bits + couple of guard bits + // extended precision log .... accurate to at least 64-bits + couple of + // guard bits __log2_ep(&log2x_hi, &log2x_lo, fabsx); double ylog2x_hi, ylog2x_lo; @@ -3009,79 +3287,44 @@ long double reference_cbrtl(long double x) MulDD(&ylog2x_hi, &ylog2x_lo, log2x_hi, log2x_lo, y_hi, y_lo); long double powxy; - if(isinf(ylog2x_hi) || (reference_fabs(ylog2x_hi) > 2200)) { - powxy = reference_signbit(ylog2x_hi) ? HEX_DBL( +, 0, 0, +, 0 ) : INFINITY; - } else { + if (isinf(ylog2x_hi) || (reference_fabs(ylog2x_hi) > 2200)) + { + powxy = + reference_signbit(ylog2x_hi) ? HEX_DBL(+, 0, 0, +, 0) : INFINITY; + } + else + { // separate integer + fractional part long int m = lrint(ylog2x_hi); AddDD(&ylog2x_hi, &ylog2x_lo, ylog2x_hi, ylog2x_lo, -m, 0.0); // revert to long double arithemtic - long double ylog2x = (long double) ylog2x_hi + (long double) ylog2x_lo; - powxy = reference_exp2l( ylog2x ); + long double ylog2x = (long double)ylog2x_hi + (long double)ylog2x_lo; + powxy = reference_exp2l(ylog2x); powxy = reference_scalblnl(powxy, m); } - return reference_copysignl( powxy, x ); -} - -/* -long double scalbnl( long double x, int i ) -{ - //suitable for checking double precision scalbn only - - if( i > 3000 ) - return copysignl( INFINITY, x); - if( i < -3000 ) - return copysignl( 0.0L, x); - - if( i > 0 ) - { - while( i >= 1000 ) - { - x *= HEX_LDBL( +, 1, 0, +, 1000 ); - i -= 1000; - } - - union{ cl_ulong u; double d;}u; - u.u = (cl_ulong)( i + 1023 ) << 52; - x *= (long double) u.d; - } - else if( i < 0 ) - { - while( i <= -1000 ) - { - x *= HEX_LDBL( +, 1, 0, -, 1000 ); - i += 1000; - } - - union{ cl_ulong u; double d;}u; - u.u = (cl_ulong)( i + 1023 ) << 52; - x *= (long double) u.d; - } - - return x; + return reference_copysignl(powxy, x); } -*/ -long double reference_rintl( long double x ) +long double reference_rintl(long double x) { #if defined(__PPC__) - // On PPC, long doubles are maintained as 2 doubles. Therefore, the combined - // mantissa can represent more than LDBL_MANT_DIG binary digits. - x = rintl(x); + // On PPC, long doubles are maintained as 2 doubles. Therefore, the combined + // mantissa can represent more than LDBL_MANT_DIG binary digits. + x = rintl(x); #else - static long double magic[2] = { 0.0L, 0.0L}; + static long double magic[2] = { 0.0L, 0.0L }; - if( 0.0L == magic[0] ) + if (0.0L == magic[0]) { magic[0] = scalbnl(0.5L, LDBL_MANT_DIG); magic[1] = scalbnl(-0.5L, LDBL_MANT_DIG); } - if( reference_fabsl(x) < magic[0] && x != 0.0L ) + if (reference_fabsl(x) < magic[0] && x != 0.0L) { - long double m = magic[ x < 0 ]; + long double m = magic[x < 0]; x += m; x -= m; } @@ -3094,7 +3337,7 @@ long double reference_rintl( long double x ) static void __sqrt_ep(double *rhi, double *rlo, double xhi, double xlo) { // approximate reciprocal sqrt - double thi = 1.0 / sqrt( xhi ); + double thi = 1.0 / sqrt(xhi); double tlo = 0.0; // One newton iteration in double-double @@ -3108,34 +3351,31 @@ static void __sqrt_ep(double *rhi, double *rlo, double xhi, double xlo) MulDD(rhi, rlo, yhi, ylo, xhi, xlo); } -long double reference_acoshl( long double x ) -{ -/* - * ==================================================== - * This function derived from fdlibm http://www.netlib.org - * It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunSoft, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - * - */ - if( isnan(x) || isinf(x)) - return x + fabsl(x); - - if( x < 1.0L ) - return cl_make_nan(); +long double reference_acoshl(long double x) +{ + /* + * ==================================================== + * This function derived from fdlibm http://www.netlib.org + * It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + * + */ + if (isnan(x) || isinf(x)) return x + fabsl(x); - if( x == 1.0L ) - return 0.0L; + if (x < 1.0L) return cl_make_nan(); + + if (x == 1.0L) return 0.0L; - if( x > HEX_LDBL( +, 1, 0, +, 60 ) ) + if (x > HEX_LDBL(+, 1, 0, +, 60)) return reference_logl(x) + 0.693147180559945309417232121458176568L; - if( x > 2.0L ) - return reference_logl(2.0L * x - 1.0L / (x + sqrtl(x*x - 1.0L))); + if (x > 2.0L) + return reference_logl(2.0L * x - 1.0L / (x + sqrtl(x * x - 1.0L))); double hi, lo; MulD(&hi, &lo, x, x); @@ -3144,286 +3384,290 @@ long double reference_acoshl( long double x ) AddDD(&hi, &lo, hi, lo, x, 0.0); double correction = lo / hi; __log2_ep(&hi, &lo, hi); - double log2Hi = HEX_DBL( +, 1, 62e42fefa39ef, -, 1 ); - double log2Lo = HEX_DBL( +, 1, abc9e3b39803f, -, 56 ); + double log2Hi = HEX_DBL(+, 1, 62e42fefa39ef, -, 1); + double log2Lo = HEX_DBL(+, 1, abc9e3b39803f, -, 56); MulDD(&hi, &lo, hi, lo, log2Hi, log2Lo); AddDD(&hi, &lo, hi, lo, correction, 0.0); return hi + lo; } -long double reference_asinhl( long double x ) +long double reference_asinhl(long double x) { long double cutoff = 0.0L; - const long double ln2 = HEX_LDBL( +, b, 17217f7d1cf79ab, -, 4 ); + const long double ln2 = HEX_LDBL(+, b, 17217f7d1cf79ab, -, 4); - if( cutoff == 0.0L ) - cutoff = reference_ldexpl(1.0L, -LDBL_MANT_DIG); + if (cutoff == 0.0L) cutoff = reference_ldexpl(1.0L, -LDBL_MANT_DIG); - if( isnan(x) || isinf(x) ) - return x + x; + if (isnan(x) || isinf(x)) return x + x; long double absx = reference_fabsl(x); - if( absx < cutoff ) - return x; + if (absx < cutoff) return x; long double sign = reference_copysignl(1.0L, x); - if( absx <= 4.0/3.0 ) { - return sign * reference_log1pl( absx + x*x / (1.0 + sqrtl(1.0 + x*x))); + if (absx <= 4.0 / 3.0) + { + return sign + * reference_log1pl(absx + x * x / (1.0 + sqrtl(1.0 + x * x))); } - else if( absx <= HEX_LDBL( +, 1, 0, +, 27 ) ) { - return sign * reference_logl( 2.0L * absx + 1.0L / (sqrtl( x * x + 1.0 ) + absx)); + else if (absx <= HEX_LDBL(+, 1, 0, +, 27)) + { + return sign + * reference_logl(2.0L * absx + 1.0L / (sqrtl(x * x + 1.0) + absx)); } - else { - return sign * ( reference_logl( absx ) + ln2 ); + else + { + return sign * (reference_logl(absx) + ln2); } } -long double reference_atanhl( long double x ) +long double reference_atanhl(long double x) { -/* - * ==================================================== - * This function is from fdlibm: http://www.netlib.org - * It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunSoft, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - if( isnan(x) ) - return x + x; + /* + * ==================================================== + * This function is from fdlibm: http://www.netlib.org + * It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + if (isnan(x)) return x + x; - long double signed_half = reference_copysignl( 0.5L, x ); + long double signed_half = reference_copysignl(0.5L, x); x = reference_fabsl(x); - if( x > 1.0L ) - return cl_make_nan(); + if (x > 1.0L) return cl_make_nan(); - if( x < 0.5L ) - return signed_half * reference_log1pl( 2.0L * ( x + x*x / (1-x) ) ); + if (x < 0.5L) + return signed_half * reference_log1pl(2.0L * (x + x * x / (1 - x))); - return signed_half * reference_log1pl(2.0L * x / (1-x)); + return signed_half * reference_log1pl(2.0L * x / (1 - x)); } -long double reference_exp2l( long double z) +long double reference_exp2l(long double z) { double_double x; int j; // Handle NaNs - if( isnan(z) ) - return z; + if (isnan(z)) return z; // init x x.hi = z; x.lo = z - x.hi; - //Deal with overflow and underflow for exp2(x) stage next - if( x.hi >= 1025 ) - return INFINITY; + // Deal with overflow and underflow for exp2(x) stage next + if (x.hi >= 1025) return INFINITY; - if( x.hi < -1075-24 ) - return +0.0; + if (x.hi < -1075 - 24) return +0.0; // find nearest integer to x - int i = (int) rint(x.hi); + int i = (int)rint(x.hi); // x now holds fractional part. The result would be then 2**i * exp2( x ) x.hi -= i; - // We could attempt to find a minimax polynomial for exp2(x) over the range x = [-0.5, 0.5]. - // However, this would converge very slowly near the extrema, where 0.5**n is not a lot different - // from 0.5**(n+1), thereby requiring something like a 20th order polynomial to get 53 + 24 bits - // of precision. Instead we further reduce the range to [-1/32, 1/32] by observing that + // We could attempt to find a minimax polynomial for exp2(x) over the range + // x = [-0.5, 0.5]. However, this would converge very slowly near the + // extrema, where 0.5**n is not a lot different from 0.5**(n+1), thereby + // requiring something like a 20th order polynomial to get 53 + 24 bits of + // precision. Instead we further reduce the range to [-1/32, 1/32] by + // observing that // // 2**(a+b) = 2**a * 2**b // - // We can thus build a table of 2**a values for a = n/16, n = [-8, 8], and reduce the range - // of x to [-1/32, 1/32] by subtracting away the nearest value of n/16 from x. - const double_double corrections[17] = - { - { HEX_DBL( +, 1, 6a09e667f3bcd, -, 1 ), HEX_DBL( -, 1, bdd3413b26456, -, 55 ) }, - { HEX_DBL( +, 1, 7a11473eb0187, -, 1 ), HEX_DBL( -, 1, 41577ee04992f, -, 56 ) }, - { HEX_DBL( +, 1, 8ace5422aa0db, -, 1 ), HEX_DBL( +, 1, 6e9f156864b27, -, 55 ) }, - { HEX_DBL( +, 1, 9c49182a3f09, -, 1 ), HEX_DBL( +, 1, c7c46b071f2be, -, 57 ) }, - { HEX_DBL( +, 1, ae89f995ad3ad, -, 1 ), HEX_DBL( +, 1, 7a1cd345dcc81, -, 55 ) }, - { HEX_DBL( +, 1, c199bdd85529c, -, 1 ), HEX_DBL( +, 1, 11065895048dd, -, 56 ) }, - { HEX_DBL( +, 1, d5818dcfba487, -, 1 ), HEX_DBL( +, 1, 2ed02d75b3707, -, 56 ) }, - { HEX_DBL( +, 1, ea4afa2a490da, -, 1 ), HEX_DBL( -, 1, e9c23179c2893, -, 55 ) }, - { HEX_DBL( +, 1, 0, +, 0 ), HEX_DBL( +, 0, 0, +, 0 ) }, - { HEX_DBL( +, 1, 0b5586cf9890f, +, 0 ), HEX_DBL( +, 1, 8a62e4adc610b, -, 54 ) }, - { HEX_DBL( +, 1, 172b83c7d517b, +, 0 ), HEX_DBL( -, 1, 19041b9d78a76, -, 55 ) }, - { HEX_DBL( +, 1, 2387a6e756238, +, 0 ), HEX_DBL( +, 1, 9b07eb6c70573, -, 54 ) }, - { HEX_DBL( +, 1, 306fe0a31b715, +, 0 ), HEX_DBL( +, 1, 6f46ad23182e4, -, 55 ) }, - { HEX_DBL( +, 1, 3dea64c123422, +, 0 ), HEX_DBL( +, 1, ada0911f09ebc, -, 55 ) }, - { HEX_DBL( +, 1, 4bfdad5362a27, +, 0 ), HEX_DBL( +, 1, d4397afec42e2, -, 56 ) }, - { HEX_DBL( +, 1, 5ab07dd485429, +, 0 ), HEX_DBL( +, 1, 6324c054647ad, -, 54 ) }, - { HEX_DBL( +, 1, 6a09e667f3bcd, +, 0 ), HEX_DBL( -, 1, bdd3413b26456, -, 54 ) } + // We can thus build a table of 2**a values for a = n/16, n = [-8, 8], and + // reduce the range of x to [-1/32, 1/32] by subtracting away the nearest + // value of n/16 from x. + const double_double corrections[17] = { + { HEX_DBL(+, 1, 6a09e667f3bcd, -, 1), + HEX_DBL(-, 1, bdd3413b26456, -, 55) }, + { HEX_DBL(+, 1, 7a11473eb0187, -, 1), + HEX_DBL(-, 1, 41577ee04992f, -, 56) }, + { HEX_DBL(+, 1, 8ace5422aa0db, -, 1), + HEX_DBL(+, 1, 6e9f156864b27, -, 55) }, + { HEX_DBL(+, 1, 9c49182a3f09, -, 1), + HEX_DBL(+, 1, c7c46b071f2be, -, 57) }, + { HEX_DBL(+, 1, ae89f995ad3ad, -, 1), + HEX_DBL(+, 1, 7a1cd345dcc81, -, 55) }, + { HEX_DBL(+, 1, c199bdd85529c, -, 1), + HEX_DBL(+, 1, 11065895048dd, -, 56) }, + { HEX_DBL(+, 1, d5818dcfba487, -, 1), + HEX_DBL(+, 1, 2ed02d75b3707, -, 56) }, + { HEX_DBL(+, 1, ea4afa2a490da, -, 1), + HEX_DBL(-, 1, e9c23179c2893, -, 55) }, + { HEX_DBL(+, 1, 0, +, 0), HEX_DBL(+, 0, 0, +, 0) }, + { HEX_DBL(+, 1, 0b5586cf9890f, +, 0), + HEX_DBL(+, 1, 8a62e4adc610b, -, 54) }, + { HEX_DBL(+, 1, 172b83c7d517b, +, 0), + HEX_DBL(-, 1, 19041b9d78a76, -, 55) }, + { HEX_DBL(+, 1, 2387a6e756238, +, 0), + HEX_DBL(+, 1, 9b07eb6c70573, -, 54) }, + { HEX_DBL(+, 1, 306fe0a31b715, +, 0), + HEX_DBL(+, 1, 6f46ad23182e4, -, 55) }, + { HEX_DBL(+, 1, 3dea64c123422, +, 0), + HEX_DBL(+, 1, ada0911f09ebc, -, 55) }, + { HEX_DBL(+, 1, 4bfdad5362a27, +, 0), + HEX_DBL(+, 1, d4397afec42e2, -, 56) }, + { HEX_DBL(+, 1, 5ab07dd485429, +, 0), + HEX_DBL(+, 1, 6324c054647ad, -, 54) }, + { HEX_DBL(+, 1, 6a09e667f3bcd, +, 0), + HEX_DBL(-, 1, bdd3413b26456, -, 54) } }; - int index = (int) rint( x.hi * 16.0 ); - x.hi -= (double) index * 0.0625; + int index = (int)rint(x.hi * 16.0); + x.hi -= (double)index * 0.0625; // canonicalize x double temp = x.hi; x.hi += x.lo; x.lo -= x.hi - temp; - // Minimax polynomial for (exp2(x)-1)/x, over the range [-1/32, 1/32]. Max Error: 2 * 0x1.e112p-87 - const double_double c[] = { - {HEX_DBL( +, 1, 62e42fefa39ef, -, 1 ), HEX_DBL( +, 1, abc9e3ac1d244, -, 56 )}, - {HEX_DBL( +, 1, ebfbdff82c58f, -, 3 ), HEX_DBL( -, 1, 5e4987a631846, -, 57 )}, - {HEX_DBL( +, 1, c6b08d704a0c, -, 5 ), HEX_DBL( -, 1, d323200a05713, -, 59 )}, - {HEX_DBL( +, 1, 3b2ab6fba4e7a, -, 7 ), HEX_DBL( +, 1, c5ee8f8b9f0c1, -, 63 )}, - {HEX_DBL( +, 1, 5d87fe78a672a, -, 10 ), HEX_DBL( +, 1, 884e5e5cc7ecc, -, 64 )}, - {HEX_DBL( +, 1, 430912f7e8373, -, 13 ), HEX_DBL( +, 1, 4f1b59514a326, -, 67 )}, - {HEX_DBL( +, 1, ffcbfc5985e71, -, 17 ), HEX_DBL( -, 1, db7d6a0953b78, -, 71 )}, - {HEX_DBL( +, 1, 62c150eb16465, -, 20 ), HEX_DBL( +, 1, e0767c2d7abf5, -, 80 )}, - {HEX_DBL( +, 1, b52502b5e953, -, 24 ), HEX_DBL( +, 1, 6797523f944bc, -, 78 )} - }; - size_t count = sizeof( c ) / sizeof( c[0] ); + // Minimax polynomial for (exp2(x)-1)/x, over the range [-1/32, 1/32]. Max + // Error: 2 * 0x1.e112p-87 + const double_double c[] = { { HEX_DBL(+, 1, 62e42fefa39ef, -, 1), + HEX_DBL(+, 1, abc9e3ac1d244, -, 56) }, + { HEX_DBL(+, 1, ebfbdff82c58f, -, 3), + HEX_DBL(-, 1, 5e4987a631846, -, 57) }, + { HEX_DBL(+, 1, c6b08d704a0c, -, 5), + HEX_DBL(-, 1, d323200a05713, -, 59) }, + { HEX_DBL(+, 1, 3b2ab6fba4e7a, -, 7), + HEX_DBL(+, 1, c5ee8f8b9f0c1, -, 63) }, + { HEX_DBL(+, 1, 5d87fe78a672a, -, 10), + HEX_DBL(+, 1, 884e5e5cc7ecc, -, 64) }, + { HEX_DBL(+, 1, 430912f7e8373, -, 13), + HEX_DBL(+, 1, 4f1b59514a326, -, 67) }, + { HEX_DBL(+, 1, ffcbfc5985e71, -, 17), + HEX_DBL(-, 1, db7d6a0953b78, -, 71) }, + { HEX_DBL(+, 1, 62c150eb16465, -, 20), + HEX_DBL(+, 1, e0767c2d7abf5, -, 80) }, + { HEX_DBL(+, 1, b52502b5e953, -, 24), + HEX_DBL(+, 1, 6797523f944bc, -, 78) } }; + size_t count = sizeof(c) / sizeof(c[0]); // Do polynomial - double_double r = c[count-1]; - for( j = (int) count-2; j >= 0; j-- ) - r = add_dd( c[j], mul_dd( r, x ) ); + double_double r = c[count - 1]; + for (j = (int)count - 2; j >= 0; j--) r = add_dd(c[j], mul_dd(r, x)); // unwind approximation - r = mul_dd( r, x ); // before: r =(exp2(x)-1)/x; after: r = exp2(x) - 1 + r = mul_dd(r, x); // before: r =(exp2(x)-1)/x; after: r = exp2(x) - 1 // correct for [-0.5, 0.5] -> [-1/32, 1/32] reduction above // exp2(x) = (r + 1) * correction = r * correction + correction - r = mul_dd( r, corrections[index+8] ); - r = add_dd( r, corrections[index+8] ); + r = mul_dd(r, corrections[index + 8]); + r = add_dd(r, corrections[index + 8]); -// Format result for output: + // Format result for output: // Get mantissa - long double m = ((long double) r.hi + (long double) r.lo ); + long double m = ((long double)r.hi + (long double)r.lo); // Handle a pesky overflow cases when long double = double - if( i > 512 ) + if (i > 512) { - m *= HEX_DBL( +, 1, 0, +, 512 ); + m *= HEX_DBL(+, 1, 0, +, 512); i -= 512; } - else if( i < -512 ) + else if (i < -512) { - m *= HEX_DBL( +, 1, 0, -, 512 ); + m *= HEX_DBL(+, 1, 0, -, 512); i += 512; } - return m * ldexpl( 1.0L, i ); + return m * ldexpl(1.0L, i); } -long double reference_expm1l( long double x) +long double reference_expm1l(long double x) { -#if defined( _MSC_VER ) && ! defined( __INTEL_COMPILER ) - //unimplemented +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) + // unimplemented return x; #else - union { double f; cl_ulong u;} u; - u.f = (double) x; + if (reference_isnanl(x)) return x; - if (reference_isnanl(x)) - return x; - - if ( x > 710 ) - return INFINITY; + if (x > 710) return INFINITY; long double y = expm1l(x); // Range of expm1l is -1.0L to +inf. Negative inf // on a few Linux platforms is clearly the wrong sign. - if (reference_isinfl(y)) - y = INFINITY; + if (reference_isinfl(y)) y = INFINITY; return y; #endif } -long double reference_fmaxl( long double x, long double y ) +long double reference_fmaxl(long double x, long double y) { - if( isnan(y) ) - return x; + if (isnan(y)) return x; return x >= y ? x : y; } -long double reference_fminl( long double x, long double y ) +long double reference_fminl(long double x, long double y) { - if( isnan(y) ) - return x; + if (isnan(y)) return x; return x <= y ? x : y; } -long double reference_hypotl( long double x, long double y ) +long double reference_hypotl(long double x, long double y) { - static const double tobig = HEX_DBL( +, 1, 0, +, 511 ); - static const double big = HEX_DBL( +, 1, 0, +, 513 ); - static const double rbig = HEX_DBL( +, 1, 0, -, 513 ); - static const double tosmall = HEX_DBL( +, 1, 0, -, 511 ); - static const double smalll = HEX_DBL( +, 1, 0, -, 607 ); - static const double rsmall = HEX_DBL( +, 1, 0, +, 607 ); + static const double tobig = HEX_DBL(+, 1, 0, +, 511); + static const double big = HEX_DBL(+, 1, 0, +, 513); + static const double rbig = HEX_DBL(+, 1, 0, -, 513); + static const double tosmall = HEX_DBL(+, 1, 0, -, 511); + static const double smalll = HEX_DBL(+, 1, 0, -, 607); + static const double rsmall = HEX_DBL(+, 1, 0, +, 607); long double max, min; - if( isinf(x) || isinf(y) ) - return INFINITY; + if (isinf(x) || isinf(y)) return INFINITY; - if( isnan(x) || isnan(y) ) - return x + y; + if (isnan(x) || isnan(y)) return x + y; x = reference_fabsl(x); y = reference_fabsl(y); - max = reference_fmaxl( x, y ); - min = reference_fminl( x, y ); + max = reference_fmaxl(x, y); + min = reference_fminl(x, y); - if( max > tobig ) + if (max > tobig) { max *= rbig; min *= rbig; - return big * sqrtl( max * max + min * min ); + return big * sqrtl(max * max + min * min); } - if( max < tosmall ) + if (max < tosmall) { max *= rsmall; min *= rsmall; - return smalll * sqrtl( max * max + min * min ); + return smalll * sqrtl(max * max + min * min); } - return sqrtl( x * x + y * y ); + return sqrtl(x * x + y * y); } -//long double reference_log2l( long double x ) -//{ -// return log( x ) * 1.44269504088896340735992468100189214L; -//} - -long double reference_log2l( long double x ) +long double reference_log2l(long double x) { - if( isnan(x) || x < 0.0 || x == -INFINITY) - return NAN; + if (isnan(x) || x < 0.0 || x == -INFINITY) return NAN; - if( x == 0.0f) - return -INFINITY; + if (x == 0.0f) return -INFINITY; - if( x == INFINITY ) - return INFINITY; + if (x == INFINITY) return INFINITY; double hi, lo; - __log2_ep( &hi, &lo, x); + __log2_ep(&hi, &lo, x); - return (long double) hi + (long double) lo; + return (long double)hi + (long double)lo; } -long double reference_log1pl( long double x) +long double reference_log1pl(long double x) { -#if defined( _MSC_VER ) && ! defined( __INTEL_COMPILER ) - //unimplemented +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) + // unimplemented return x; #elif defined(__PPC__) // log1pl on PPC inadvertantly returns NaN for very large values. Work @@ -3434,23 +3678,24 @@ long double reference_log1pl( long double x) #endif } -long double reference_logbl( long double x ) +long double reference_logbl(long double x) { // Since we are just using this to verify double precision, we can // use the double precision copysign here - union { double f; cl_ulong u;} u; - u.f = (double) x; + union { + double f; + cl_ulong u; + } u; + u.f = (double)x; cl_int exponent = (cl_uint)(u.u >> 52) & 0x7ff; - if( exponent == 0x7ff ) - return x * x; + if (exponent == 0x7ff) return x * x; - if( exponent == 0 ) - { // deal with denormals - u.f = x * HEX_DBL( +, 1, 0, +, 64 ); + if (exponent == 0) + { // deal with denormals + u.f = x * HEX_DBL(+, 1, 0, +, 64); exponent = (cl_int)(u.u >> 52) & 0x7ff; - if( exponent == 0 ) - return -INFINITY; + if (exponent == 0) return -INFINITY; return exponent - (1023 + 64); } @@ -3458,84 +3703,82 @@ long double reference_logbl( long double x ) return exponent - 1023; } -long double reference_maxmagl( long double x, long double y ) +long double reference_maxmagl(long double x, long double y) { long double fabsx = fabsl(x); long double fabsy = fabsl(y); - if( fabsx < fabsy ) - return y; + if (fabsx < fabsy) return y; - if( fabsy < fabsx ) - return x; + if (fabsy < fabsx) return x; return reference_fmaxl(x, y); } -long double reference_minmagl( long double x, long double y ) +long double reference_minmagl(long double x, long double y) { long double fabsx = fabsl(x); long double fabsy = fabsl(y); - if( fabsx > fabsy ) - return y; + if (fabsx > fabsy) return y; - if( fabsy > fabsx ) - return x; + if (fabsy > fabsx) return x; return reference_fminl(x, y); } -long double reference_nanl( cl_ulong x ) +long double reference_nanl(cl_ulong x) { - union{ cl_ulong u; cl_double f; }u; + union { + cl_ulong u; + cl_double f; + } u; u.u = x | 0x7ff8000000000000ULL; - return (long double) u.f; + return (long double)u.f; } -long double reference_reciprocall( long double x ) -{ - return 1.0L / x; -} +long double reference_reciprocall(long double x) { return 1.0L / x; } -long double reference_remainderl( long double x, long double y ); -long double reference_remainderl( long double x, long double y ) +long double reference_remainderl(long double x, long double y) { int i; - return reference_remquol( x, y, &i ); + return reference_remquol(x, y, &i); } -long double reference_lgammal( long double x); -long double reference_lgammal( long double x) +long double reference_lgammal(long double x) { // lgamma is currently not tested - return reference_lgamma( x ); -} - -static uint32_t two_over_pi[] = { 0x0, 0x28be60db, 0x24e44152, 0x27f09d5f, 0x11f534dd, 0x3036d8a5, 0x1993c439, 0x107f945, 0x23abdebb, 0x31586dc9, -0x6e3a424, 0x374b8019, 0x92eea09, 0x3464873f, 0x21deb1cb, 0x4a69cfb, 0x288235f5, 0xbaed121, 0xe99c702, 0x1ad17df9, -0x13991d6, 0xe60d4ce, 0x1f49c845, 0x3e2ef7e4, 0x283b1ff8, 0x25fff781, 0x1980fef2, 0x3c462d68, 0xa6d1f6d, 0xd9fb3c9, -0x3cb09b74, 0x3d18fd9a, 0x1e5fea2d, 0x1d49eeb1, 0x3ebe5f17, 0x2cf41ce7, 0x378a5292, 0x3a9afed7, 0x3b11f8d5, 0x3421580c, -0x3046fc7b, 0x1aeafc33, 0x3bc209af, 0x10d876a7, 0x2391615e, 0x3986c219, 0x199855f1, 0x1281a102, 0xdffd880, 0x135cc9cc, -0x10606155 + return reference_lgamma(x); +} + +static uint32_t two_over_pi[] = { + 0x0, 0x28be60db, 0x24e44152, 0x27f09d5f, 0x11f534dd, 0x3036d8a5, + 0x1993c439, 0x107f945, 0x23abdebb, 0x31586dc9, 0x6e3a424, 0x374b8019, + 0x92eea09, 0x3464873f, 0x21deb1cb, 0x4a69cfb, 0x288235f5, 0xbaed121, + 0xe99c702, 0x1ad17df9, 0x13991d6, 0xe60d4ce, 0x1f49c845, 0x3e2ef7e4, + 0x283b1ff8, 0x25fff781, 0x1980fef2, 0x3c462d68, 0xa6d1f6d, 0xd9fb3c9, + 0x3cb09b74, 0x3d18fd9a, 0x1e5fea2d, 0x1d49eeb1, 0x3ebe5f17, 0x2cf41ce7, + 0x378a5292, 0x3a9afed7, 0x3b11f8d5, 0x3421580c, 0x3046fc7b, 0x1aeafc33, + 0x3bc209af, 0x10d876a7, 0x2391615e, 0x3986c219, 0x199855f1, 0x1281a102, + 0xdffd880, 0x135cc9cc, 0x10606155 }; -static uint32_t pi_over_two[] = { 0x1, 0x2487ed51, 0x42d1846, 0x26263314, 0x1701b839, 0x28948127 }; +static uint32_t pi_over_two[] = { 0x1, 0x2487ed51, 0x42d1846, + 0x26263314, 0x1701b839, 0x28948127 }; -typedef union - { - uint64_t u; - double d; - }d_ui64_t; +union d_ui64_t { + uint64_t u; + double d; +}; // radix or base of representation #define RADIX (30) #define DIGITS 6 -d_ui64_t two_pow_pradix = { (uint64_t) (1023 + RADIX) << 52 }; -d_ui64_t two_pow_mradix = { (uint64_t) (1023 - RADIX) << 52 }; -d_ui64_t two_pow_two_mradix = { (uint64_t) (1023-2*RADIX) << 52 }; +d_ui64_t two_pow_pradix = { (uint64_t)(1023 + RADIX) << 52 }; +d_ui64_t two_pow_mradix = { (uint64_t)(1023 - RADIX) << 52 }; +d_ui64_t two_pow_two_mradix = { (uint64_t)(1023 - 2 * RADIX) << 52 }; #define tp_pradix two_pow_pradix.d #define tp_mradix two_pow_mradix.d @@ -3543,28 +3786,29 @@ d_ui64_t two_pow_two_mradix = { (uint64_t) (1023-2*RADIX) << 52 }; // extended fixed point representation of double precision // floating point number. // x = sign * [ sum_{i = 0 to 2} ( X[i] * 2^(index - i)*RADIX ) ] -typedef struct - { - uint32_t X[3]; // three 32 bit integers are sufficient to represnt double in base_30 - int index; // exponent bias - int sign; // sign of double - }eprep_t; - -static eprep_t double_to_eprep(double x); +struct eprep_t +{ + uint32_t X[3]; // three 32 bit integers are sufficient to represnt double in + // base_30 + int index; // exponent bias + int sign; // sign of double +}; static eprep_t double_to_eprep(double x) { eprep_t result; - result.sign = (signbit( x ) == 0) ? 1 : -1; - x = fabs( x ); + result.sign = (signbit(x) == 0) ? 1 : -1; + x = fabs(x); int index = 0; - while( x > tp_pradix ) { + while (x > tp_pradix) + { index++; x *= tp_mradix; } - while( x < 1 ) { + while (x < 1) + { index--; x *= tp_pradix; } @@ -3572,190 +3816,125 @@ static eprep_t double_to_eprep(double x) result.index = index; int i = 0; result.X[0] = result.X[1] = result.X[2] = 0; - while( x != 0.0 ) { - result.X[i] = (uint32_t) x; - x = (x - (double) result.X[i]) * tp_pradix; + while (x != 0.0) + { + result.X[i] = (uint32_t)x; + x = (x - (double)result.X[i]) * tp_pradix; i++; } return result; } -/* - double eprep_to_double( uint32_t *R, int digits, int index, int sgn ) - { - d_ui64_t nb, rndcorr; - uint64_t lowpart, roundbits, t1; - int expo, expofinal, shift; - double res; - - nb.d = (double) R[0]; - - t1 = R[1]; - lowpart = (t1 << RADIX) + R[2]; - expo = ((nb.u & 0x7ff0000000000000ULL) >> 52) - 1023; - - expofinal = expo + RADIX*index; - - if (expofinal > 1023) { - d_ui64_t inf = { 0x7ff0000000000000ULL }; - res = inf.d; - } - - else if (expofinal >= -1022){ - shift = expo + 2*RADIX - 53; - roundbits = lowpart << (64-shift); - lowpart = lowpart >> shift; - if (lowpart & 0x0000000000000001ULL) { - if(roundbits == 0) { - int i; - for (i=3; i < digits; i++) - roundbits = roundbits | R[i]; - } - if(roundbits == 0) { - if (lowpart & 0x0000000000000002ULL) - rndcorr.u = (uint64_t) (expo - 52 + 1023) << 52; - else - rndcorr.d = 0.0; - } - else - rndcorr.u = (uint64_t) (expo - 52 + 1023) << 52; - } - else{ - rndcorr.d = 0.0; - } - - lowpart = lowpart >> 1; - nb.u = nb.u | lowpart; - res = nb.d + rndcorr.d; - - if(index*RADIX + 1023 > 0) { - nb.u = 0; - nb.u = (uint64_t) (index*RADIX + 1023) << 52; - res *= nb.d; - } - else { - nb.u = 0; - nb.u = (uint64_t) (index*RADIX + 1023 + 2*RADIX) << 52; - res *= two_pow_two_mradix.d; - res *= nb.d; - } - } - else { - if (expofinal < -1022 - 53 ) { - res = 0.0; - } - else { - lowpart = lowpart >> (expo + (2*RADIX) - 52); - nb.u = nb.u | lowpart; - nb.u = (nb.u & 0x000FFFFFFFFFFFFFULL) | 0x0010000000000000ULL; - nb.u = nb.u >> (-1023 - expofinal); - if(nb.u & 0x0000000000000001ULL) - rndcorr.u = 1; - else - rndcorr.d = 0.0; - res = 0.5*(nb.d + rndcorr.d); - } - } - - return sgn*res; - } - */ -static double eprep_to_double( eprep_t epx ); - -static double eprep_to_double( eprep_t epx ) +static double eprep_to_double(eprep_t epx) { double res = 0.0; - res += ldexp((double) epx.X[0], (epx.index - 0)*RADIX); - res += ldexp((double) epx.X[1], (epx.index - 1)*RADIX); - res += ldexp((double) epx.X[2], (epx.index - 2)*RADIX); + res += ldexp((double)epx.X[0], (epx.index - 0) * RADIX); + res += ldexp((double)epx.X[1], (epx.index - 1) * RADIX); + res += ldexp((double)epx.X[2], (epx.index - 2) * RADIX); return copysign(res, epx.sign); } -static int payne_hanek( double *y, int *exception ); - -static int payne_hanek( double *y, int *exception ) +static int payne_hanek(double *y, int *exception) { double x = *y; // exception cases .. no reduction required - if( isnan( x ) || isinf( x ) || (fabs( x ) <= M_PI_4) ) { + if (isnan(x) || isinf(x) || (fabs(x) <= M_PI_4)) + { *exception = 1; return 0; } *exception = 0; - // After computation result[0] contains integer part while result[1]....result[DIGITS-1] - // contain fractional part. So we are doing computation with (DIGITS-1)*RADIX precision. - // Default DIGITS=6 and RADIX=30 so default precision is 150 bits. Kahan-McDonald algorithm - // shows that a double precision x, closest to pi/2 is 6381956970095103 x 2^797 which can - // cause 61 digits of cancellation in computation of f = x*2/pi - floor(x*2/pi) ... thus we need - // at least 114 bits (61 leading zeros + 53 bits of mentissa of f) of precision to accurately compute - // f in double precision. Since we are using 150 bits (still an overkill), we should be safe. Extra - // bits can act as guard bits for correct rounding. - uint64_t result[DIGITS+2]; + // After computation result[0] contains integer part while + // result[1]....result[DIGITS-1] contain fractional part. So we are doing + // computation with (DIGITS-1)*RADIX precision. Default DIGITS=6 and + // RADIX=30 so default precision is 150 bits. Kahan-McDonald algorithm shows + // that a double precision x, closest to pi/2 is 6381956970095103 x 2^797 + // which can cause 61 digits of cancellation in computation of f = x*2/pi - + // floor(x*2/pi) ... thus we need at least 114 bits (61 leading zeros + 53 + // bits of mentissa of f) of precision to accurately compute f in double + // precision. Since we are using 150 bits (still an overkill), we should be + // safe. Extra bits can act as guard bits for correct rounding. + uint64_t result[DIGITS + 2]; // compute extended precision representation of x - eprep_t epx = double_to_eprep( x ); + eprep_t epx = double_to_eprep(x); int index = epx.index; int i, j; - // extended precision multiplication of 2/pi*x .... we will loose at max two RADIX=30 bit digits in - // the worst case - for(i = 0; i < (DIGITS+2); i++) { + // extended precision multiplication of 2/pi*x .... we will loose at max two + // RADIX=30 bit digits in the worst case + for (i = 0; i < (DIGITS + 2); i++) + { result[i] = 0; - result[i] += ((index + i - 0) >= 0) ? ((uint64_t) two_over_pi[index + i - 0] * (uint64_t) epx.X[0]) : 0; - result[i] += ((index + i - 1) >= 0) ? ((uint64_t) two_over_pi[index + i - 1] * (uint64_t) epx.X[1]) : 0; - result[i] += ((index + i - 2) >= 0) ? ((uint64_t) two_over_pi[index + i - 2] * (uint64_t) epx.X[2]) : 0; + result[i] += ((index + i - 0) >= 0) + ? ((uint64_t)two_over_pi[index + i - 0] * (uint64_t)epx.X[0]) + : 0; + result[i] += ((index + i - 1) >= 0) + ? ((uint64_t)two_over_pi[index + i - 1] * (uint64_t)epx.X[1]) + : 0; + result[i] += ((index + i - 2) >= 0) + ? ((uint64_t)two_over_pi[index + i - 2] * (uint64_t)epx.X[2]) + : 0; } // Carry propagation. uint64_t tmp; - for(i = DIGITS+2-1; i > 0; i--) { + for (i = DIGITS + 2 - 1; i > 0; i--) + { tmp = result[i] >> RADIX; result[i - 1] += tmp; result[i] -= (tmp << RADIX); } - // we dont ned to normalize the integer part since only last two bits of this will be used - // subsequently algorithm which remain unaltered by this normalization. - // tmp = result[0] >> RADIX; - // result[0] -= (tmp << RADIX); - unsigned int N = (unsigned int) result[0]; + // we dont ned to normalize the integer part since only last two bits of + // this will be used subsequently algorithm which remain unaltered by this + // normalization. tmp = result[0] >> RADIX; result[0] -= (tmp << RADIX); + unsigned int N = (unsigned int)result[0]; - // if the result is > pi/4, bring it to (-pi/4, pi/4] range. Note that testing if the final - // x_star = pi/2*(x*2/pi - k) > pi/4 is equivalent to testing, at this stage, if r[1] (the first fractional - // digit) is greater than (2^RADIX)/2 and substracting pi/4 from x_star to bring it to mentioned - // range is equivalent to substracting fractional part at this stage from one and changing the sign. + // if the result is > pi/4, bring it to (-pi/4, pi/4] range. Note that + // testing if the final x_star = pi/2*(x*2/pi - k) > pi/4 is equivalent to + // testing, at this stage, if r[1] (the first fractional digit) is greater + // than (2^RADIX)/2 and substracting pi/4 from x_star to bring it to + // mentioned range is equivalent to substracting fractional part at this + // stage from one and changing the sign. int sign = 1; - if(result[1] > (uint64_t)(1 << (RADIX - 1))) { - for(i = 1; i < (DIGITS + 2); i++) + if (result[1] > (uint64_t)(1 << (RADIX - 1))) + { + for (i = 1; i < (DIGITS + 2); i++) result[i] = (~((unsigned int)result[i]) & 0x3fffffff); N += 1; sign = -1; } - // Again as per Kahan-McDonald algorithim there may be 61 leading zeros in the worst case - // (when x is multiple of 2/pi very close to an integer) so we need to get rid of these zeros - // and adjust the index of final result. So in the worst case, precision of comupted result is - // 90 bits (150 bits original bits - 60 lost in cancellation). + // Again as per Kahan-McDonald algorithim there may be 61 leading zeros in + // the worst case (when x is multiple of 2/pi very close to an integer) so + // we need to get rid of these zeros and adjust the index of final result. + // So in the worst case, precision of comupted result is 90 bits (150 bits + // original bits - 60 lost in cancellation). int ind = 1; - for(i = 1; i < (DIGITS+2); i++) { - if(result[i] != 0) + for (i = 1; i < (DIGITS + 2); i++) + { + if (result[i] != 0) break; else ind++; } - uint64_t r[DIGITS-1]; - for(i = 0; i < (DIGITS-1); i++) { + uint64_t r[DIGITS - 1]; + for (i = 0; i < (DIGITS - 1); i++) + { r[i] = 0; - for(j = 0; j <= i; j++) { - r[i] += (result[ind+i-j] * (uint64_t) pi_over_two[j]); + for (j = 0; j <= i; j++) + { + r[i] += (result[ind + i - j] * (uint64_t)pi_over_two[j]); } } - for(i = (DIGITS-2); i > 0; i--) { + for (i = (DIGITS - 2); i > 0; i--) + { tmp = r[i] >> RADIX; r[i - 1] += tmp; r[i] -= (tmp << RADIX); @@ -3764,147 +3943,127 @@ static int payne_hanek( double *y, int *exception ) r[0] -= (tmp << RADIX); eprep_t epr; - epr.sign = epx.sign*sign; - if(tmp != 0) { + epr.sign = epx.sign * sign; + if (tmp != 0) + { epr.index = -ind + 1; - epr.X[0] = (uint32_t) tmp; - epr.X[1] = (uint32_t) r[0]; - epr.X[2] = (uint32_t) r[1]; + epr.X[0] = (uint32_t)tmp; + epr.X[1] = (uint32_t)r[0]; + epr.X[2] = (uint32_t)r[1]; } - else { + else + { epr.index = -ind; - epr.X[0] = (uint32_t) r[0]; - epr.X[1] = (uint32_t) r[1]; - epr.X[2] = (uint32_t) r[2]; + epr.X[0] = (uint32_t)r[0]; + epr.X[1] = (uint32_t)r[1]; + epr.X[2] = (uint32_t)r[2]; } - *y = eprep_to_double( epr ); - return epx.sign*N; + *y = eprep_to_double(epr); + return epx.sign * N; } double reference_relaxed_cos(double x) { - if(isnan(x)) - return NAN; - return (float)cos((float)x); + if (isnan(x)) return NAN; + return (float)cos((float)x); } double reference_cos(double x) { int exception; - int N = payne_hanek( &x, &exception ); - if( exception ) - return cos( x ); + int N = payne_hanek(&x, &exception); + if (exception) return cos(x); unsigned int c = N & 3; - switch ( c ) { - case 0: - return cos( x ); - case 1: - return -sin( x ); - case 2: - return -cos( x ); - case 3: - return sin( x ); + switch (c) + { + case 0: return cos(x); + case 1: return -sin(x); + case 2: return -cos(x); + case 3: return sin(x); } return 0.0; } -double reference_relaxed_sin(double x){ - return (float)sin((float)x); -} +double reference_relaxed_sin(double x) { return (float)sin((float)x); } double reference_sin(double x) { int exception; - int N = payne_hanek( &x, &exception ); - if( exception ) - return sin( x ); + int N = payne_hanek(&x, &exception); + if (exception) return sin(x); int c = N & 3; - switch ( c ) { - case 0: - return sin( x ); - case 1: - return cos( x ); - case 2: - return -sin( x ); - case 3: - return -cos( x ); + switch (c) + { + case 0: return sin(x); + case 1: return cos(x); + case 2: return -sin(x); + case 3: return -cos(x); } return 0.0; } -double reference_relaxed_sincos(double x, double * y){ - *y = reference_relaxed_cos(x); - return reference_relaxed_sin(x); +double reference_relaxed_sincos(double x, double *y) +{ + *y = reference_relaxed_cos(x); + return reference_relaxed_sin(x); } double reference_sincos(double x, double *y) { int exception; - int N = payne_hanek( &x, &exception ); - if( exception ) { - *y = cos( x ); - return sin( x ); + int N = payne_hanek(&x, &exception); + if (exception) + { + *y = cos(x); + return sin(x); } int c = N & 3; - switch ( c ) { - case 0: - *y = cos( x ); - return sin( x ); - case 1: - *y = -sin( x ); - return cos( x ); - case 2: - *y = -cos( x ); - return -sin( x ); - case 3: - *y = sin( x ); - return -cos( x ); + switch (c) + { + case 0: *y = cos(x); return sin(x); + case 1: *y = -sin(x); return cos(x); + case 2: *y = -cos(x); return -sin(x); + case 3: *y = sin(x); return -cos(x); } return 0.0; } -double reference_relaxed_tan(double x){ - return ((float) reference_relaxed_sin((float)x))/((float) reference_relaxed_cos((float)x)); +double reference_relaxed_tan(double x) +{ + return ((float)reference_relaxed_sin((float)x)) + / ((float)reference_relaxed_cos((float)x)); } double reference_tan(double x) { int exception; - int N = payne_hanek( &x, &exception ); - if( exception ) - return tan( x ); + int N = payne_hanek(&x, &exception); + if (exception) return tan(x); int c = N & 3; - switch ( c ) { - case 0: - return tan( x ); - case 1: - return -1.0 / tan( x ); - case 2: - return tan( x ); - case 3: - return -1.0 / tan( x ); + switch (c) + { + case 0: return tan(x); + case 1: return -1.0 / tan(x); + case 2: return tan(x); + case 3: return -1.0 / tan(x); } return 0.0; } long double reference_cosl(long double xx) { - double x = (double) xx; + double x = (double)xx; int exception; - int N = payne_hanek( &x, &exception ); - if( exception ) - return cosl( x ); + int N = payne_hanek(&x, &exception); + if (exception) return cosl(x); unsigned int c = N & 3; - switch ( c ) { - case 0: - return cosl( x ); - case 1: - return -sinl( x ); - case 2: - return -cosl( x ); - case 3: - return sinl( x ); + switch (c) + { + case 0: return cosl(x); + case 1: return -sinl(x); + case 2: return -cosl(x); + case 3: return sinl(x); } return 0.0; } @@ -3913,25 +4072,20 @@ long double reference_sinl(long double xx) { // we use system tanl after reduction which // can flush denorm input to zero so - //take care of it here. - if(reference_fabsl(xx) < HEX_DBL( +, 1, 0, -, 1022 )) - return xx; + // take care of it here. + if (reference_fabsl(xx) < HEX_DBL(+, 1, 0, -, 1022)) return xx; - double x = (double) xx; + double x = (double)xx; int exception; - int N = payne_hanek( &x, &exception ); - if( exception ) - return sinl( x ); + int N = payne_hanek(&x, &exception); + if (exception) return sinl(x); int c = N & 3; - switch ( c ) { - case 0: - return sinl( x ); - case 1: - return cosl( x ); - case 2: - return -sinl( x ); - case 3: - return -cosl( x ); + switch (c) + { + case 0: return sinl(x); + case 1: return cosl(x); + case 2: return -sinl(x); + case 3: return -cosl(x); } return 0.0; } @@ -3940,34 +4094,28 @@ long double reference_sincosl(long double xx, long double *y) { // we use system tanl after reduction which // can flush denorm input to zero so - //take care of it here. - if(reference_fabsl(xx) < HEX_DBL( +, 1, 0, -, 1022 )) + // take care of it here. + if (reference_fabsl(xx) < HEX_DBL(+, 1, 0, -, 1022)) { *y = cosl(xx); return xx; } - double x = (double) xx; + double x = (double)xx; int exception; - int N = payne_hanek( &x, &exception ); - if( exception ) { - *y = cosl( x ); - return sinl( x ); + int N = payne_hanek(&x, &exception); + if (exception) + { + *y = cosl(x); + return sinl(x); } int c = N & 3; - switch ( c ) { - case 0: - *y = cosl( x ); - return sinl( x ); - case 1: - *y = -sinl( x ); - return cosl( x ); - case 2: - *y = -cosl( x ); - return -sinl( x ); - case 3: - *y = sinl( x ); - return -cosl( x ); + switch (c) + { + case 0: *y = cosl(x); return sinl(x); + case 1: *y = -sinl(x); return cosl(x); + case 2: *y = -cosl(x); return -sinl(x); + case 3: *y = sinl(x); return -cosl(x); } return 0.0; } @@ -3976,205 +4124,337 @@ long double reference_tanl(long double xx) { // we use system tanl after reduction which // can flush denorm input to zero so - //take care of it here. - if(reference_fabsl(xx) < HEX_DBL( +, 1, 0, -, 1022 )) - return xx; + // take care of it here. + if (reference_fabsl(xx) < HEX_DBL(+, 1, 0, -, 1022)) return xx; - double x = (double) xx; + double x = (double)xx; int exception; - int N = payne_hanek( &x, &exception ); - if( exception ) - return tanl( x ); + int N = payne_hanek(&x, &exception); + if (exception) return tanl(x); int c = N & 3; - switch ( c ) { - case 0: - return tanl( x ); - case 1: - return -1.0 / tanl( x ); - case 2: - return tanl( x ); - case 3: - return -1.0 / tanl( x ); + switch (c) + { + case 0: return tanl(x); + case 1: return -1.0 / tanl(x); + case 2: return tanl(x); + case 3: return -1.0 / tanl(x); } return 0.0; } static double __loglTable1[64][3] = { -{HEX_DBL( +, 1, 5390948f40fea, +, 0 ), HEX_DBL( -, 1, a152f142a, -, 2 ), HEX_DBL( +, 1, f93e27b43bd2c, -, 40 )}, -{HEX_DBL( +, 1, 5015015015015, +, 0 ), HEX_DBL( -, 1, 921800925, -, 2 ), HEX_DBL( +, 1, 162432a1b8df7, -, 41 )}, -{HEX_DBL( +, 1, 4cab88725af6e, +, 0 ), HEX_DBL( -, 1, 8304d90c18, -, 2 ), HEX_DBL( +, 1, 80bb749056fe7, -, 40 )}, -{HEX_DBL( +, 1, 49539e3b2d066, +, 0 ), HEX_DBL( -, 1, 7418acebc, -, 2 ), HEX_DBL( +, 1, ceac7f0607711, -, 43 )}, -{HEX_DBL( +, 1, 460cbc7f5cf9a, +, 0 ), HEX_DBL( -, 1, 6552b49988, -, 2 ), HEX_DBL( +, 1, d8913d0e89fa, -, 42 )}, -{HEX_DBL( +, 1, 42d6625d51f86, +, 0 ), HEX_DBL( -, 1, 56b22e6b58, -, 2 ), HEX_DBL( +, 1, c7eaf515033a1, -, 44 )}, -{HEX_DBL( +, 1, 3fb013fb013fb, +, 0 ), HEX_DBL( -, 1, 48365e696, -, 2 ), HEX_DBL( +, 1, 434adcde7edc7, -, 41 )}, -{HEX_DBL( +, 1, 3c995a47babe7, +, 0 ), HEX_DBL( -, 1, 39de8e156, -, 2 ), HEX_DBL( +, 1, 8246f8e527754, -, 40 )}, -{HEX_DBL( +, 1, 3991c2c187f63, +, 0 ), HEX_DBL( -, 1, 2baa0c34c, -, 2 ), HEX_DBL( +, 1, e1513c28e180d, -, 42 )}, -{HEX_DBL( +, 1, 3698df3de0747, +, 0 ), HEX_DBL( -, 1, 1d982c9d58, -, 2 ), HEX_DBL( +, 1, 63ea3fed4b8a2, -, 40 )}, -{HEX_DBL( +, 1, 33ae45b57bcb1, +, 0 ), HEX_DBL( -, 1, 0fa848045, -, 2 ), HEX_DBL( +, 1, 32ccbacf1779b, -, 40 )}, -{HEX_DBL( +, 1, 30d190130d19, +, 0 ), HEX_DBL( -, 1, 01d9bbcfa8, -, 2 ), HEX_DBL( +, 1, e2bfeb2b884aa, -, 42 )}, -{HEX_DBL( +, 1, 2e025c04b8097, +, 0 ), HEX_DBL( -, 1, e857d3d37, -, 3 ), HEX_DBL( +, 1, d9309b4d2ea85, -, 40 )}, -{HEX_DBL( +, 1, 2b404ad012b4, +, 0 ), HEX_DBL( -, 1, cd3c712d4, -, 3 ), HEX_DBL( +, 1, ddf360962d7ab, -, 40 )}, -{HEX_DBL( +, 1, 288b01288b012, +, 0 ), HEX_DBL( -, 1, b2602497e, -, 3 ), HEX_DBL( +, 1, 597f8a121640f, -, 40 )}, -{HEX_DBL( +, 1, 25e22708092f1, +, 0 ), HEX_DBL( -, 1, 97c1cb13d, -, 3 ), HEX_DBL( +, 1, 02807d15580dc, -, 40 )}, -{HEX_DBL( +, 1, 23456789abcdf, +, 0 ), HEX_DBL( -, 1, 7d60496d, -, 3 ), HEX_DBL( +, 1, 12ce913d7a827, -, 41 )}, -{HEX_DBL( +, 1, 20b470c67c0d8, +, 0 ), HEX_DBL( -, 1, 633a8bf44, -, 3 ), HEX_DBL( +, 1, 0648bca9c96bd, -, 40 )}, -{HEX_DBL( +, 1, 1e2ef3b3fb874, +, 0 ), HEX_DBL( -, 1, 494f863b9, -, 3 ), HEX_DBL( +, 1, 066fceb89b0eb, -, 42 )}, -{HEX_DBL( +, 1, 1bb4a4046ed29, +, 0 ), HEX_DBL( -, 1, 2f9e32d5c, -, 3 ), HEX_DBL( +, 1, 17b8b6c4f846b, -, 46 )}, -{HEX_DBL( +, 1, 19453808ca29c, +, 0 ), HEX_DBL( -, 1, 162593187, -, 3 ), HEX_DBL( +, 1, 2c83506452154, -, 42 )}, -{HEX_DBL( +, 1, 16e0689427378, +, 0 ), HEX_DBL( -, 1, f9c95dc1e, -, 4 ), HEX_DBL( +, 1, dd5d2183150f3, -, 41 )}, -{HEX_DBL( +, 1, 1485f0e0acd3b, +, 0 ), HEX_DBL( -, 1, c7b528b72, -, 4 ), HEX_DBL( +, 1, 0e43c4f4e619d, -, 40 )}, -{HEX_DBL( +, 1, 12358e75d3033, +, 0 ), HEX_DBL( -, 1, 960caf9ac, -, 4 ), HEX_DBL( +, 1, 20fbfd5902a1e, -, 42 )}, -{HEX_DBL( +, 1, 0fef010fef01, +, 0 ), HEX_DBL( -, 1, 64ce26c08, -, 4 ), HEX_DBL( +, 1, 8ebeefb4ac467, -, 40 )}, -{HEX_DBL( +, 1, 0db20a88f4695, +, 0 ), HEX_DBL( -, 1, 33f7cde16, -, 4 ), HEX_DBL( +, 1, 30b3312da7a7d, -, 40 )}, -{HEX_DBL( +, 1, 0b7e6ec259dc7, +, 0 ), HEX_DBL( -, 1, 0387efbcc, -, 4 ), HEX_DBL( +, 1, 796f1632949c3, -, 40 )}, -{HEX_DBL( +, 1, 0953f39010953, +, 0 ), HEX_DBL( -, 1, a6f9c378, -, 5 ), HEX_DBL( +, 1, 1687e151172cc, -, 40 )}, -{HEX_DBL( +, 1, 073260a47f7c6, +, 0 ), HEX_DBL( -, 1, 47aa07358, -, 5 ), HEX_DBL( +, 1, 1f87e4a9cc778, -, 42 )}, -{HEX_DBL( +, 1, 05197f7d73404, +, 0 ), HEX_DBL( -, 1, d23afc498, -, 6 ), HEX_DBL( +, 1, b183a6b628487, -, 40 )}, -{HEX_DBL( +, 1, 03091b51f5e1a, +, 0 ), HEX_DBL( -, 1, 16a21e21, -, 6 ), HEX_DBL( +, 1, 7d75c58973ce5, -, 40 )}, -{HEX_DBL( +, 1, 0, +, 0 ), HEX_DBL( +, 0, 0, +, 0 ), HEX_DBL( +, 0, 0, +, 0 )}, -{HEX_DBL( +, 1, 0, +, 0 ), HEX_DBL( +, 0, 0, +, 0 ), HEX_DBL( +, 0, 0, +, 0 )}, -{HEX_DBL( +, 1, f44659e4a4271, -, 1 ), HEX_DBL( +, 1, 11cd1d51, -, 5 ), HEX_DBL( +, 1, 9a0d857e2f4b2, -, 40 )}, -{HEX_DBL( +, 1, ecc07b301ecc, -, 1 ), HEX_DBL( +, 1, c4dfab908, -, 5 ), HEX_DBL( +, 1, 55b53fce557fd, -, 40 )}, -{HEX_DBL( +, 1, e573ac901e573, -, 1 ), HEX_DBL( +, 1, 3aa2fdd26, -, 4 ), HEX_DBL( +, 1, f1cb0c9532089, -, 40 )}, -{HEX_DBL( +, 1, de5d6e3f8868a, -, 1 ), HEX_DBL( +, 1, 918a16e46, -, 4 ), HEX_DBL( +, 1, 9af0dcd65a6e1, -, 43 )}, -{HEX_DBL( +, 1, d77b654b82c33, -, 1 ), HEX_DBL( +, 1, e72ec117e, -, 4 ), HEX_DBL( +, 1, a5b93c4ebe124, -, 40 )}, -{HEX_DBL( +, 1, d0cb58f6ec074, -, 1 ), HEX_DBL( +, 1, 1dcd19755, -, 3 ), HEX_DBL( +, 1, 5be50e71ddc6c, -, 42 )}, -{HEX_DBL( +, 1, ca4b3055ee191, -, 1 ), HEX_DBL( +, 1, 476a9f983, -, 3 ), HEX_DBL( +, 1, ee9a798719e7f, -, 40 )}, -{HEX_DBL( +, 1, c3f8f01c3f8f, -, 1 ), HEX_DBL( +, 1, 70742d4ef, -, 3 ), HEX_DBL( +, 1, 3ff1352c1219c, -, 46 )}, -{HEX_DBL( +, 1, bdd2b899406f7, -, 1 ), HEX_DBL( +, 1, 98edd077e, -, 3 ), HEX_DBL( +, 1, c383cd11362f4, -, 41 )}, -{HEX_DBL( +, 1, b7d6c3dda338b, -, 1 ), HEX_DBL( +, 1, c0db6cdd9, -, 3 ), HEX_DBL( +, 1, 37bd85b1a824e, -, 41 )}, -{HEX_DBL( +, 1, b2036406c80d9, -, 1 ), HEX_DBL( +, 1, e840be74e, -, 3 ), HEX_DBL( +, 1, a9334d525e1ec, -, 41 )}, -{HEX_DBL( +, 1, ac5701ac5701a, -, 1 ), HEX_DBL( +, 1, 0790adbb, -, 2 ), HEX_DBL( +, 1, 8060bfb6a491, -, 41 )}, -{HEX_DBL( +, 1, a6d01a6d01a6d, -, 1 ), HEX_DBL( +, 1, 1ac05b2918, -, 2 ), HEX_DBL( +, 1, c1c161471580a, -, 40 )}, -{HEX_DBL( +, 1, a16d3f97a4b01, -, 1 ), HEX_DBL( +, 1, 2db10fc4d8, -, 2 ), HEX_DBL( +, 1, ab1aa62214581, -, 42 )}, -{HEX_DBL( +, 1, 9c2d14ee4a101, -, 1 ), HEX_DBL( +, 1, 406463b1b, -, 2 ), HEX_DBL( +, 1, 12e95dbda6611, -, 44 )}, -{HEX_DBL( +, 1, 970e4f80cb872, -, 1 ), HEX_DBL( +, 1, 52dbdfc4c8, -, 2 ), HEX_DBL( +, 1, 6b53fee511af, -, 42 )}, -{HEX_DBL( +, 1, 920fb49d0e228, -, 1 ), HEX_DBL( +, 1, 6518fe467, -, 2 ), HEX_DBL( +, 1, eea7d7d7d1764, -, 40 )}, -{HEX_DBL( +, 1, 8d3018d3018d3, -, 1 ), HEX_DBL( +, 1, 771d2ba7e8, -, 2 ), HEX_DBL( +, 1, ecefa8d4fab97, -, 40 )}, -{HEX_DBL( +, 1, 886e5f0abb049, -, 1 ), HEX_DBL( +, 1, 88e9c72e08, -, 2 ), HEX_DBL( +, 1, 913ea3d33fd14, -, 41 )}, -{HEX_DBL( +, 1, 83c977ab2bedd, -, 1 ), HEX_DBL( +, 1, 9a802391e, -, 2 ), HEX_DBL( +, 1, 197e845877c94, -, 41 )}, -{HEX_DBL( +, 1, 7f405fd017f4, -, 1 ), HEX_DBL( +, 1, abe18797f, -, 2 ), HEX_DBL( +, 1, f4a52f8e8a81, -, 42 )}, -{HEX_DBL( +, 1, 7ad2208e0ecc3, -, 1 ), HEX_DBL( +, 1, bd0f2e9e78, -, 2 ), HEX_DBL( +, 1, 031f4336644cc, -, 42 )}, -{HEX_DBL( +, 1, 767dce434a9b1, -, 1 ), HEX_DBL( +, 1, ce0a4923a, -, 2 ), HEX_DBL( +, 1, 61f33c897020c, -, 40 )}, -{HEX_DBL( +, 1, 724287f46debc, -, 1 ), HEX_DBL( +, 1, ded3fd442, -, 2 ), HEX_DBL( +, 1, b2632e830632, -, 41 )}, -{HEX_DBL( +, 1, 6e1f76b4337c6, -, 1 ), HEX_DBL( +, 1, ef6d673288, -, 2 ), HEX_DBL( +, 1, 888ec245a0bf, -, 40 )}, -{HEX_DBL( +, 1, 6a13cd153729, -, 1 ), HEX_DBL( +, 1, ffd799a838, -, 2 ), HEX_DBL( +, 1, fe6f3b2f5fc8e, -, 40 )}, -{HEX_DBL( +, 1, 661ec6a5122f9, -, 1 ), HEX_DBL( +, 1, 0809cf27f4, -, 1 ), HEX_DBL( +, 1, 81eaa9ef284dd, -, 40 )}, -{HEX_DBL( +, 1, 623fa7701623f, -, 1 ), HEX_DBL( +, 1, 10113b153c, -, 1 ), HEX_DBL( +, 1, 1d7b07d6b1143, -, 42 )}, -{HEX_DBL( +, 1, 5e75bb8d015e7, -, 1 ), HEX_DBL( +, 1, 18028cf728, -, 1 ), HEX_DBL( +, 1, 76b100b1f6c6, -, 41 )}, -{HEX_DBL( +, 1, 5ac056b015ac, -, 1 ), HEX_DBL( +, 1, 1fde3d30e8, -, 1 ), HEX_DBL( +, 1, 26faeb9870945, -, 45 )}, -{HEX_DBL( +, 1, 571ed3c506b39, -, 1 ), HEX_DBL( +, 1, 27a4c0585c, -, 1 ), HEX_DBL( +, 1, 7f2c5344d762b, -, 42 )} + { HEX_DBL(+, 1, 5390948f40fea, +, 0), HEX_DBL(-, 1, a152f142a, -, 2), + HEX_DBL(+, 1, f93e27b43bd2c, -, 40) }, + { HEX_DBL(+, 1, 5015015015015, +, 0), HEX_DBL(-, 1, 921800925, -, 2), + HEX_DBL(+, 1, 162432a1b8df7, -, 41) }, + { HEX_DBL(+, 1, 4cab88725af6e, +, 0), HEX_DBL(-, 1, 8304d90c18, -, 2), + HEX_DBL(+, 1, 80bb749056fe7, -, 40) }, + { HEX_DBL(+, 1, 49539e3b2d066, +, 0), HEX_DBL(-, 1, 7418acebc, -, 2), + HEX_DBL(+, 1, ceac7f0607711, -, 43) }, + { HEX_DBL(+, 1, 460cbc7f5cf9a, +, 0), HEX_DBL(-, 1, 6552b49988, -, 2), + HEX_DBL(+, 1, d8913d0e89fa, -, 42) }, + { HEX_DBL(+, 1, 42d6625d51f86, +, 0), HEX_DBL(-, 1, 56b22e6b58, -, 2), + HEX_DBL(+, 1, c7eaf515033a1, -, 44) }, + { HEX_DBL(+, 1, 3fb013fb013fb, +, 0), HEX_DBL(-, 1, 48365e696, -, 2), + HEX_DBL(+, 1, 434adcde7edc7, -, 41) }, + { HEX_DBL(+, 1, 3c995a47babe7, +, 0), HEX_DBL(-, 1, 39de8e156, -, 2), + HEX_DBL(+, 1, 8246f8e527754, -, 40) }, + { HEX_DBL(+, 1, 3991c2c187f63, +, 0), HEX_DBL(-, 1, 2baa0c34c, -, 2), + HEX_DBL(+, 1, e1513c28e180d, -, 42) }, + { HEX_DBL(+, 1, 3698df3de0747, +, 0), HEX_DBL(-, 1, 1d982c9d58, -, 2), + HEX_DBL(+, 1, 63ea3fed4b8a2, -, 40) }, + { HEX_DBL(+, 1, 33ae45b57bcb1, +, 0), HEX_DBL(-, 1, 0fa848045, -, 2), + HEX_DBL(+, 1, 32ccbacf1779b, -, 40) }, + { HEX_DBL(+, 1, 30d190130d19, +, 0), HEX_DBL(-, 1, 01d9bbcfa8, -, 2), + HEX_DBL(+, 1, e2bfeb2b884aa, -, 42) }, + { HEX_DBL(+, 1, 2e025c04b8097, +, 0), HEX_DBL(-, 1, e857d3d37, -, 3), + HEX_DBL(+, 1, d9309b4d2ea85, -, 40) }, + { HEX_DBL(+, 1, 2b404ad012b4, +, 0), HEX_DBL(-, 1, cd3c712d4, -, 3), + HEX_DBL(+, 1, ddf360962d7ab, -, 40) }, + { HEX_DBL(+, 1, 288b01288b012, +, 0), HEX_DBL(-, 1, b2602497e, -, 3), + HEX_DBL(+, 1, 597f8a121640f, -, 40) }, + { HEX_DBL(+, 1, 25e22708092f1, +, 0), HEX_DBL(-, 1, 97c1cb13d, -, 3), + HEX_DBL(+, 1, 02807d15580dc, -, 40) }, + { HEX_DBL(+, 1, 23456789abcdf, +, 0), HEX_DBL(-, 1, 7d60496d, -, 3), + HEX_DBL(+, 1, 12ce913d7a827, -, 41) }, + { HEX_DBL(+, 1, 20b470c67c0d8, +, 0), HEX_DBL(-, 1, 633a8bf44, -, 3), + HEX_DBL(+, 1, 0648bca9c96bd, -, 40) }, + { HEX_DBL(+, 1, 1e2ef3b3fb874, +, 0), HEX_DBL(-, 1, 494f863b9, -, 3), + HEX_DBL(+, 1, 066fceb89b0eb, -, 42) }, + { HEX_DBL(+, 1, 1bb4a4046ed29, +, 0), HEX_DBL(-, 1, 2f9e32d5c, -, 3), + HEX_DBL(+, 1, 17b8b6c4f846b, -, 46) }, + { HEX_DBL(+, 1, 19453808ca29c, +, 0), HEX_DBL(-, 1, 162593187, -, 3), + HEX_DBL(+, 1, 2c83506452154, -, 42) }, + { HEX_DBL(+, 1, 16e0689427378, +, 0), HEX_DBL(-, 1, f9c95dc1e, -, 4), + HEX_DBL(+, 1, dd5d2183150f3, -, 41) }, + { HEX_DBL(+, 1, 1485f0e0acd3b, +, 0), HEX_DBL(-, 1, c7b528b72, -, 4), + HEX_DBL(+, 1, 0e43c4f4e619d, -, 40) }, + { HEX_DBL(+, 1, 12358e75d3033, +, 0), HEX_DBL(-, 1, 960caf9ac, -, 4), + HEX_DBL(+, 1, 20fbfd5902a1e, -, 42) }, + { HEX_DBL(+, 1, 0fef010fef01, +, 0), HEX_DBL(-, 1, 64ce26c08, -, 4), + HEX_DBL(+, 1, 8ebeefb4ac467, -, 40) }, + { HEX_DBL(+, 1, 0db20a88f4695, +, 0), HEX_DBL(-, 1, 33f7cde16, -, 4), + HEX_DBL(+, 1, 30b3312da7a7d, -, 40) }, + { HEX_DBL(+, 1, 0b7e6ec259dc7, +, 0), HEX_DBL(-, 1, 0387efbcc, -, 4), + HEX_DBL(+, 1, 796f1632949c3, -, 40) }, + { HEX_DBL(+, 1, 0953f39010953, +, 0), HEX_DBL(-, 1, a6f9c378, -, 5), + HEX_DBL(+, 1, 1687e151172cc, -, 40) }, + { HEX_DBL(+, 1, 073260a47f7c6, +, 0), HEX_DBL(-, 1, 47aa07358, -, 5), + HEX_DBL(+, 1, 1f87e4a9cc778, -, 42) }, + { HEX_DBL(+, 1, 05197f7d73404, +, 0), HEX_DBL(-, 1, d23afc498, -, 6), + HEX_DBL(+, 1, b183a6b628487, -, 40) }, + { HEX_DBL(+, 1, 03091b51f5e1a, +, 0), HEX_DBL(-, 1, 16a21e21, -, 6), + HEX_DBL(+, 1, 7d75c58973ce5, -, 40) }, + { HEX_DBL(+, 1, 0, +, 0), HEX_DBL(+, 0, 0, +, 0), HEX_DBL(+, 0, 0, +, 0) }, + { HEX_DBL(+, 1, 0, +, 0), HEX_DBL(+, 0, 0, +, 0), HEX_DBL(+, 0, 0, +, 0) }, + { HEX_DBL(+, 1, f44659e4a4271, -, 1), HEX_DBL(+, 1, 11cd1d51, -, 5), + HEX_DBL(+, 1, 9a0d857e2f4b2, -, 40) }, + { HEX_DBL(+, 1, ecc07b301ecc, -, 1), HEX_DBL(+, 1, c4dfab908, -, 5), + HEX_DBL(+, 1, 55b53fce557fd, -, 40) }, + { HEX_DBL(+, 1, e573ac901e573, -, 1), HEX_DBL(+, 1, 3aa2fdd26, -, 4), + HEX_DBL(+, 1, f1cb0c9532089, -, 40) }, + { HEX_DBL(+, 1, de5d6e3f8868a, -, 1), HEX_DBL(+, 1, 918a16e46, -, 4), + HEX_DBL(+, 1, 9af0dcd65a6e1, -, 43) }, + { HEX_DBL(+, 1, d77b654b82c33, -, 1), HEX_DBL(+, 1, e72ec117e, -, 4), + HEX_DBL(+, 1, a5b93c4ebe124, -, 40) }, + { HEX_DBL(+, 1, d0cb58f6ec074, -, 1), HEX_DBL(+, 1, 1dcd19755, -, 3), + HEX_DBL(+, 1, 5be50e71ddc6c, -, 42) }, + { HEX_DBL(+, 1, ca4b3055ee191, -, 1), HEX_DBL(+, 1, 476a9f983, -, 3), + HEX_DBL(+, 1, ee9a798719e7f, -, 40) }, + { HEX_DBL(+, 1, c3f8f01c3f8f, -, 1), HEX_DBL(+, 1, 70742d4ef, -, 3), + HEX_DBL(+, 1, 3ff1352c1219c, -, 46) }, + { HEX_DBL(+, 1, bdd2b899406f7, -, 1), HEX_DBL(+, 1, 98edd077e, -, 3), + HEX_DBL(+, 1, c383cd11362f4, -, 41) }, + { HEX_DBL(+, 1, b7d6c3dda338b, -, 1), HEX_DBL(+, 1, c0db6cdd9, -, 3), + HEX_DBL(+, 1, 37bd85b1a824e, -, 41) }, + { HEX_DBL(+, 1, b2036406c80d9, -, 1), HEX_DBL(+, 1, e840be74e, -, 3), + HEX_DBL(+, 1, a9334d525e1ec, -, 41) }, + { HEX_DBL(+, 1, ac5701ac5701a, -, 1), HEX_DBL(+, 1, 0790adbb, -, 2), + HEX_DBL(+, 1, 8060bfb6a491, -, 41) }, + { HEX_DBL(+, 1, a6d01a6d01a6d, -, 1), HEX_DBL(+, 1, 1ac05b2918, -, 2), + HEX_DBL(+, 1, c1c161471580a, -, 40) }, + { HEX_DBL(+, 1, a16d3f97a4b01, -, 1), HEX_DBL(+, 1, 2db10fc4d8, -, 2), + HEX_DBL(+, 1, ab1aa62214581, -, 42) }, + { HEX_DBL(+, 1, 9c2d14ee4a101, -, 1), HEX_DBL(+, 1, 406463b1b, -, 2), + HEX_DBL(+, 1, 12e95dbda6611, -, 44) }, + { HEX_DBL(+, 1, 970e4f80cb872, -, 1), HEX_DBL(+, 1, 52dbdfc4c8, -, 2), + HEX_DBL(+, 1, 6b53fee511af, -, 42) }, + { HEX_DBL(+, 1, 920fb49d0e228, -, 1), HEX_DBL(+, 1, 6518fe467, -, 2), + HEX_DBL(+, 1, eea7d7d7d1764, -, 40) }, + { HEX_DBL(+, 1, 8d3018d3018d3, -, 1), HEX_DBL(+, 1, 771d2ba7e8, -, 2), + HEX_DBL(+, 1, ecefa8d4fab97, -, 40) }, + { HEX_DBL(+, 1, 886e5f0abb049, -, 1), HEX_DBL(+, 1, 88e9c72e08, -, 2), + HEX_DBL(+, 1, 913ea3d33fd14, -, 41) }, + { HEX_DBL(+, 1, 83c977ab2bedd, -, 1), HEX_DBL(+, 1, 9a802391e, -, 2), + HEX_DBL(+, 1, 197e845877c94, -, 41) }, + { HEX_DBL(+, 1, 7f405fd017f4, -, 1), HEX_DBL(+, 1, abe18797f, -, 2), + HEX_DBL(+, 1, f4a52f8e8a81, -, 42) }, + { HEX_DBL(+, 1, 7ad2208e0ecc3, -, 1), HEX_DBL(+, 1, bd0f2e9e78, -, 2), + HEX_DBL(+, 1, 031f4336644cc, -, 42) }, + { HEX_DBL(+, 1, 767dce434a9b1, -, 1), HEX_DBL(+, 1, ce0a4923a, -, 2), + HEX_DBL(+, 1, 61f33c897020c, -, 40) }, + { HEX_DBL(+, 1, 724287f46debc, -, 1), HEX_DBL(+, 1, ded3fd442, -, 2), + HEX_DBL(+, 1, b2632e830632, -, 41) }, + { HEX_DBL(+, 1, 6e1f76b4337c6, -, 1), HEX_DBL(+, 1, ef6d673288, -, 2), + HEX_DBL(+, 1, 888ec245a0bf, -, 40) }, + { HEX_DBL(+, 1, 6a13cd153729, -, 1), HEX_DBL(+, 1, ffd799a838, -, 2), + HEX_DBL(+, 1, fe6f3b2f5fc8e, -, 40) }, + { HEX_DBL(+, 1, 661ec6a5122f9, -, 1), HEX_DBL(+, 1, 0809cf27f4, -, 1), + HEX_DBL(+, 1, 81eaa9ef284dd, -, 40) }, + { HEX_DBL(+, 1, 623fa7701623f, -, 1), HEX_DBL(+, 1, 10113b153c, -, 1), + HEX_DBL(+, 1, 1d7b07d6b1143, -, 42) }, + { HEX_DBL(+, 1, 5e75bb8d015e7, -, 1), HEX_DBL(+, 1, 18028cf728, -, 1), + HEX_DBL(+, 1, 76b100b1f6c6, -, 41) }, + { HEX_DBL(+, 1, 5ac056b015ac, -, 1), HEX_DBL(+, 1, 1fde3d30e8, -, 1), + HEX_DBL(+, 1, 26faeb9870945, -, 45) }, + { HEX_DBL(+, 1, 571ed3c506b39, -, 1), HEX_DBL(+, 1, 27a4c0585c, -, 1), + HEX_DBL(+, 1, 7f2c5344d762b, -, 42) } }; static double __loglTable2[64][3] = { -{HEX_DBL( +, 1, 01fbe7f0a1be6, +, 0 ), HEX_DBL( -, 1, 6cf6ddd26112a, -, 7 ), HEX_DBL( +, 1, 0725e5755e314, -, 60 )}, -{HEX_DBL( +, 1, 01eba93a97b12, +, 0 ), HEX_DBL( -, 1, 6155b1d99f603, -, 7 ), HEX_DBL( +, 1, 4bcea073117f4, -, 60 )}, -{HEX_DBL( +, 1, 01db6c9029cd1, +, 0 ), HEX_DBL( -, 1, 55b54153137ff, -, 7 ), HEX_DBL( +, 1, 21e8faccad0ec, -, 61 )}, -{HEX_DBL( +, 1, 01cb31f0f534c, +, 0 ), HEX_DBL( -, 1, 4a158c27245bd, -, 7 ), HEX_DBL( +, 1, 1a5b7bfbf35d3, -, 60 )}, -{HEX_DBL( +, 1, 01baf95c9723c, +, 0 ), HEX_DBL( -, 1, 3e76923e3d678, -, 7 ), HEX_DBL( +, 1, eee400eb5fe34, -, 62 )}, -{HEX_DBL( +, 1, 01aac2d2acee6, +, 0 ), HEX_DBL( -, 1, 32d85380ce776, -, 7 ), HEX_DBL( +, 1, cbf7a513937bd, -, 61 )}, -{HEX_DBL( +, 1, 019a8e52d401e, +, 0 ), HEX_DBL( -, 1, 273acfd74be72, -, 7 ), HEX_DBL( +, 1, 5c64599efa5e6, -, 60 )}, -{HEX_DBL( +, 1, 018a5bdca9e42, +, 0 ), HEX_DBL( -, 1, 1b9e072a2e65, -, 7 ), HEX_DBL( +, 1, 364180e0a5d37, -, 60 )}, -{HEX_DBL( +, 1, 017a2b6fcc33e, +, 0 ), HEX_DBL( -, 1, 1001f961f3243, -, 7 ), HEX_DBL( +, 1, 63d795746f216, -, 60 )}, -{HEX_DBL( +, 1, 0169fd0bd8a8a, +, 0 ), HEX_DBL( -, 1, 0466a6671bca4, -, 7 ), HEX_DBL( +, 1, 4c99ff1907435, -, 60 )}, -{HEX_DBL( +, 1, 0159d0b06d129, +, 0 ), HEX_DBL( -, 1, f1981c445cd05, -, 8 ), HEX_DBL( +, 1, 4bfff6366b723, -, 62 )}, -{HEX_DBL( +, 1, 0149a65d275a6, +, 0 ), HEX_DBL( -, 1, da6460f76ab8c, -, 8 ), HEX_DBL( +, 1, 9c5404f47589c, -, 61 )}, -{HEX_DBL( +, 1, 01397e11a581b, +, 0 ), HEX_DBL( -, 1, c3321ab87f4ef, -, 8 ), HEX_DBL( +, 1, c0da537429cea, -, 61 )}, -{HEX_DBL( +, 1, 012957cd85a28, +, 0 ), HEX_DBL( -, 1, ac014958c112c, -, 8 ), HEX_DBL( +, 1, 000c2a1b595e3, -, 64 )}, -{HEX_DBL( +, 1, 0119339065ef7, +, 0 ), HEX_DBL( -, 1, 94d1eca95f67a, -, 8 ), HEX_DBL( +, 1, d8d20b0564d5, -, 61 )}, -{HEX_DBL( +, 1, 01091159e4b3d, +, 0 ), HEX_DBL( -, 1, 7da4047b92b3e, -, 8 ), HEX_DBL( +, 1, 6194a5d68cf2, -, 66 )}, -{HEX_DBL( +, 1, 00f8f129a0535, +, 0 ), HEX_DBL( -, 1, 667790a09bf77, -, 8 ), HEX_DBL( +, 1, ca230e0bea645, -, 61 )}, -{HEX_DBL( +, 1, 00e8d2ff374a1, +, 0 ), HEX_DBL( -, 1, 4f4c90e9c4ead, -, 8 ), HEX_DBL( +, 1, 1de3e7f350c1, -, 61 )}, -{HEX_DBL( +, 1, 00d8b6da482ce, +, 0 ), HEX_DBL( -, 1, 3823052860649, -, 8 ), HEX_DBL( +, 1, 5789b4c5891b8, -, 64 )}, -{HEX_DBL( +, 1, 00c89cba71a8c, +, 0 ), HEX_DBL( -, 1, 20faed2dc9a9e, -, 8 ), HEX_DBL( +, 1, 9e7c40f9839fd, -, 62 )}, -{HEX_DBL( +, 1, 00b8849f52834, +, 0 ), HEX_DBL( -, 1, 09d448cb65014, -, 8 ), HEX_DBL( +, 1, 387e3e9b6d02, -, 62 )}, -{HEX_DBL( +, 1, 00a86e88899a4, +, 0 ), HEX_DBL( -, 1, e55e2fa53ebf1, -, 9 ), HEX_DBL( +, 1, cdaa71fddfddf, -, 62 )}, -{HEX_DBL( +, 1, 00985a75b5e3f, +, 0 ), HEX_DBL( -, 1, b716b429dce0f, -, 9 ), HEX_DBL( +, 1, 2f2af081367bf, -, 63 )}, -{HEX_DBL( +, 1, 00884866766ee, +, 0 ), HEX_DBL( -, 1, 88d21ec7a16d7, -, 9 ), HEX_DBL( +, 1, fb95c228d6f16, -, 62 )}, -{HEX_DBL( +, 1, 0078385a6a61d, +, 0 ), HEX_DBL( -, 1, 5a906f219a9e8, -, 9 ), HEX_DBL( +, 1, 18aff10a89f29, -, 64 )}, -{HEX_DBL( +, 1, 00682a5130fbe, +, 0 ), HEX_DBL( -, 1, 2c51a4dae87f1, -, 9 ), HEX_DBL( +, 1, bcc7e33ddde3, -, 63 )}, -{HEX_DBL( +, 1, 00581e4a69944, +, 0 ), HEX_DBL( -, 1, fc2b7f2d782b1, -, 10 ), HEX_DBL( +, 1, fe3ef3300a9fa, -, 64 )}, -{HEX_DBL( +, 1, 00481445b39a8, +, 0 ), HEX_DBL( -, 1, 9fb97df0b0b83, -, 10 ), HEX_DBL( +, 1, 0d9a601f2f324, -, 65 )}, -{HEX_DBL( +, 1, 00380c42ae963, +, 0 ), HEX_DBL( -, 1, 434d4546227ae, -, 10 ), HEX_DBL( +, 1, 0b9b6a5868f33, -, 63 )}, -{HEX_DBL( +, 1, 00280640fa271, +, 0 ), HEX_DBL( -, 1, cdcda8e930c19, -, 11 ), HEX_DBL( +, 1, 3d424ab39f789, -, 64 )}, -{HEX_DBL( +, 1, 0018024036051, +, 0 ), HEX_DBL( -, 1, 150c558601261, -, 11 ), HEX_DBL( +, 1, 285bb90327a0f, -, 64 )}, -{HEX_DBL( +, 1, 0, +, 0 ), HEX_DBL( +, 0, 0, +, 0 ), HEX_DBL( +, 0, 0, +, 0 )}, -{HEX_DBL( +, 1, 0, +, 0 ), HEX_DBL( +, 0, 0, +, 0 ), HEX_DBL( +, 0, 0, +, 0 )}, -{HEX_DBL( +, 1, ffa011fca0a1e, -, 1 ), HEX_DBL( +, 1, 14e5640c4197b, -, 10 ), HEX_DBL( +, 1, 95728136ae401, -, 63 )}, -{HEX_DBL( +, 1, ff6031f064e07, -, 1 ), HEX_DBL( +, 1, cd61806bf532d, -, 10 ), HEX_DBL( +, 1, 568a4f35d8538, -, 63 )}, -{HEX_DBL( +, 1, ff2061d532b9c, -, 1 ), HEX_DBL( +, 1, 42e34af550eda, -, 9 ), HEX_DBL( +, 1, 8f69cee55fec, -, 62 )}, -{HEX_DBL( +, 1, fee0a1a513253, -, 1 ), HEX_DBL( +, 1, 9f0a5523902ea, -, 9 ), HEX_DBL( +, 1, daec734b11615, -, 63 )}, -{HEX_DBL( +, 1, fea0f15a12139, -, 1 ), HEX_DBL( +, 1, fb25e19f11b26, -, 9 ), HEX_DBL( +, 1, 8bafca62941da, -, 62 )}, -{HEX_DBL( +, 1, fe6150ee3e6d4, -, 1 ), HEX_DBL( +, 1, 2b9af9a28e282, -, 8 ), HEX_DBL( +, 1, 0fd3674e1dc5b, -, 61 )}, -{HEX_DBL( +, 1, fe21c05baa109, -, 1 ), HEX_DBL( +, 1, 599d4678f24b9, -, 8 ), HEX_DBL( +, 1, dafce1f09937b, -, 61 )}, -{HEX_DBL( +, 1, fde23f9c69cf9, -, 1 ), HEX_DBL( +, 1, 8799d8c046eb, -, 8 ), HEX_DBL( +, 1, ffa0ce0bdd217, -, 65 )}, -{HEX_DBL( +, 1, fda2ceaa956e8, -, 1 ), HEX_DBL( +, 1, b590b1e5951ee, -, 8 ), HEX_DBL( +, 1, 645a769232446, -, 62 )}, -{HEX_DBL( +, 1, fd636d8047a1f, -, 1 ), HEX_DBL( +, 1, e381d3555dbcf, -, 8 ), HEX_DBL( +, 1, 882320d368331, -, 61 )}, -{HEX_DBL( +, 1, fd241c179e0cc, -, 1 ), HEX_DBL( +, 1, 08b69f3dccde, -, 7 ), HEX_DBL( +, 1, 01ad5065aba9e, -, 61 )}, -{HEX_DBL( +, 1, fce4da6ab93e8, -, 1 ), HEX_DBL( +, 1, 1fa97a61dd298, -, 7 ), HEX_DBL( +, 1, 84cd1f931ae34, -, 60 )}, -{HEX_DBL( +, 1, fca5a873bcb19, -, 1 ), HEX_DBL( +, 1, 36997bcc54a3f, -, 7 ), HEX_DBL( +, 1, 1485e97eaee03, -, 60 )}, -{HEX_DBL( +, 1, fc66862ccec93, -, 1 ), HEX_DBL( +, 1, 4d86a43264a4f, -, 7 ), HEX_DBL( +, 1, c75e63370988b, -, 61 )}, -{HEX_DBL( +, 1, fc27739018cfe, -, 1 ), HEX_DBL( +, 1, 6470f448fb09d, -, 7 ), HEX_DBL( +, 1, d7361eeaed0a1, -, 65 )}, -{HEX_DBL( +, 1, fbe87097c6f5a, -, 1 ), HEX_DBL( +, 1, 7b586cc4c2523, -, 7 ), HEX_DBL( +, 1, b3df952cc473c, -, 61 )}, -{HEX_DBL( +, 1, fba97d3e084dd, -, 1 ), HEX_DBL( +, 1, 923d0e5a21e06, -, 7 ), HEX_DBL( +, 1, cf56c7b64ae5d, -, 62 )}, -{HEX_DBL( +, 1, fb6a997d0ecdc, -, 1 ), HEX_DBL( +, 1, a91ed9bd3df9a, -, 7 ), HEX_DBL( +, 1, b957bdcd89e43, -, 61 )}, -{HEX_DBL( +, 1, fb2bc54f0f4ab, -, 1 ), HEX_DBL( +, 1, bffdcfa1f7fbb, -, 7 ), HEX_DBL( +, 1, ea8cad9a21771, -, 62 )}, -{HEX_DBL( +, 1, faed00ae41783, -, 1 ), HEX_DBL( +, 1, d6d9f0bbee6f6, -, 7 ), HEX_DBL( +, 1, 5762a9af89c82, -, 60 )}, -{HEX_DBL( +, 1, faae4b94dfe64, -, 1 ), HEX_DBL( +, 1, edb33dbe7d335, -, 7 ), HEX_DBL( +, 1, 21e24fc245697, -, 62 )}, -{HEX_DBL( +, 1, fa6fa5fd27ff8, -, 1 ), HEX_DBL( +, 1, 0244dbae5ed05, -, 6 ), HEX_DBL( +, 1, 12ef51b967102, -, 60 )}, -{HEX_DBL( +, 1, fa310fe15a078, -, 1 ), HEX_DBL( +, 1, 0daeaf24c3529, -, 6 ), HEX_DBL( +, 1, 10d3cfca60b45, -, 59 )}, -{HEX_DBL( +, 1, f9f2893bb9192, -, 1 ), HEX_DBL( +, 1, 1917199bb66bc, -, 6 ), HEX_DBL( +, 1, 6cf6034c32e19, -, 60 )}, -{HEX_DBL( +, 1, f9b412068b247, -, 1 ), HEX_DBL( +, 1, 247e1b6c615d5, -, 6 ), HEX_DBL( +, 1, 42f0fffa229f7, -, 61 )}, -{HEX_DBL( +, 1, f975aa3c18ed6, -, 1 ), HEX_DBL( +, 1, 2fe3b4efcc5ad, -, 6 ), HEX_DBL( +, 1, 70106136a8919, -, 60 )}, -{HEX_DBL( +, 1, f93751d6ae09b, -, 1 ), HEX_DBL( +, 1, 3b47e67edea93, -, 6 ), HEX_DBL( +, 1, 38dd5a4f6959a, -, 59 )}, -{HEX_DBL( +, 1, f8f908d098df6, -, 1 ), HEX_DBL( +, 1, 46aab0725ea6c, -, 6 ), HEX_DBL( +, 1, 821fc1e799e01, -, 60 )}, -{HEX_DBL( +, 1, f8bacf242aa2c, -, 1 ), HEX_DBL( +, 1, 520c1322f1e4e, -, 6 ), HEX_DBL( +, 1, 129dcda3ad563, -, 60 )}, -{HEX_DBL( +, 1, f87ca4cbb755, -, 1 ), HEX_DBL( +, 1, 5d6c0ee91d2ab, -, 6 ), HEX_DBL( +, 1, c5b190c04606e, -, 62 )}, -{HEX_DBL( +, 1, f83e89c195c25, -, 1 ), HEX_DBL( +, 1, 68caa41d448c3, -, 6 ), HEX_DBL( +, 1, 4723441195ac9, -, 59 )} + { HEX_DBL(+, 1, 01fbe7f0a1be6, +, 0), HEX_DBL(-, 1, 6cf6ddd26112a, -, 7), + HEX_DBL(+, 1, 0725e5755e314, -, 60) }, + { HEX_DBL(+, 1, 01eba93a97b12, +, 0), HEX_DBL(-, 1, 6155b1d99f603, -, 7), + HEX_DBL(+, 1, 4bcea073117f4, -, 60) }, + { HEX_DBL(+, 1, 01db6c9029cd1, +, 0), HEX_DBL(-, 1, 55b54153137ff, -, 7), + HEX_DBL(+, 1, 21e8faccad0ec, -, 61) }, + { HEX_DBL(+, 1, 01cb31f0f534c, +, 0), HEX_DBL(-, 1, 4a158c27245bd, -, 7), + HEX_DBL(+, 1, 1a5b7bfbf35d3, -, 60) }, + { HEX_DBL(+, 1, 01baf95c9723c, +, 0), HEX_DBL(-, 1, 3e76923e3d678, -, 7), + HEX_DBL(+, 1, eee400eb5fe34, -, 62) }, + { HEX_DBL(+, 1, 01aac2d2acee6, +, 0), HEX_DBL(-, 1, 32d85380ce776, -, 7), + HEX_DBL(+, 1, cbf7a513937bd, -, 61) }, + { HEX_DBL(+, 1, 019a8e52d401e, +, 0), HEX_DBL(-, 1, 273acfd74be72, -, 7), + HEX_DBL(+, 1, 5c64599efa5e6, -, 60) }, + { HEX_DBL(+, 1, 018a5bdca9e42, +, 0), HEX_DBL(-, 1, 1b9e072a2e65, -, 7), + HEX_DBL(+, 1, 364180e0a5d37, -, 60) }, + { HEX_DBL(+, 1, 017a2b6fcc33e, +, 0), HEX_DBL(-, 1, 1001f961f3243, -, 7), + HEX_DBL(+, 1, 63d795746f216, -, 60) }, + { HEX_DBL(+, 1, 0169fd0bd8a8a, +, 0), HEX_DBL(-, 1, 0466a6671bca4, -, 7), + HEX_DBL(+, 1, 4c99ff1907435, -, 60) }, + { HEX_DBL(+, 1, 0159d0b06d129, +, 0), HEX_DBL(-, 1, f1981c445cd05, -, 8), + HEX_DBL(+, 1, 4bfff6366b723, -, 62) }, + { HEX_DBL(+, 1, 0149a65d275a6, +, 0), HEX_DBL(-, 1, da6460f76ab8c, -, 8), + HEX_DBL(+, 1, 9c5404f47589c, -, 61) }, + { HEX_DBL(+, 1, 01397e11a581b, +, 0), HEX_DBL(-, 1, c3321ab87f4ef, -, 8), + HEX_DBL(+, 1, c0da537429cea, -, 61) }, + { HEX_DBL(+, 1, 012957cd85a28, +, 0), HEX_DBL(-, 1, ac014958c112c, -, 8), + HEX_DBL(+, 1, 000c2a1b595e3, -, 64) }, + { HEX_DBL(+, 1, 0119339065ef7, +, 0), HEX_DBL(-, 1, 94d1eca95f67a, -, 8), + HEX_DBL(+, 1, d8d20b0564d5, -, 61) }, + { HEX_DBL(+, 1, 01091159e4b3d, +, 0), HEX_DBL(-, 1, 7da4047b92b3e, -, 8), + HEX_DBL(+, 1, 6194a5d68cf2, -, 66) }, + { HEX_DBL(+, 1, 00f8f129a0535, +, 0), HEX_DBL(-, 1, 667790a09bf77, -, 8), + HEX_DBL(+, 1, ca230e0bea645, -, 61) }, + { HEX_DBL(+, 1, 00e8d2ff374a1, +, 0), HEX_DBL(-, 1, 4f4c90e9c4ead, -, 8), + HEX_DBL(+, 1, 1de3e7f350c1, -, 61) }, + { HEX_DBL(+, 1, 00d8b6da482ce, +, 0), HEX_DBL(-, 1, 3823052860649, -, 8), + HEX_DBL(+, 1, 5789b4c5891b8, -, 64) }, + { HEX_DBL(+, 1, 00c89cba71a8c, +, 0), HEX_DBL(-, 1, 20faed2dc9a9e, -, 8), + HEX_DBL(+, 1, 9e7c40f9839fd, -, 62) }, + { HEX_DBL(+, 1, 00b8849f52834, +, 0), HEX_DBL(-, 1, 09d448cb65014, -, 8), + HEX_DBL(+, 1, 387e3e9b6d02, -, 62) }, + { HEX_DBL(+, 1, 00a86e88899a4, +, 0), HEX_DBL(-, 1, e55e2fa53ebf1, -, 9), + HEX_DBL(+, 1, cdaa71fddfddf, -, 62) }, + { HEX_DBL(+, 1, 00985a75b5e3f, +, 0), HEX_DBL(-, 1, b716b429dce0f, -, 9), + HEX_DBL(+, 1, 2f2af081367bf, -, 63) }, + { HEX_DBL(+, 1, 00884866766ee, +, 0), HEX_DBL(-, 1, 88d21ec7a16d7, -, 9), + HEX_DBL(+, 1, fb95c228d6f16, -, 62) }, + { HEX_DBL(+, 1, 0078385a6a61d, +, 0), HEX_DBL(-, 1, 5a906f219a9e8, -, 9), + HEX_DBL(+, 1, 18aff10a89f29, -, 64) }, + { HEX_DBL(+, 1, 00682a5130fbe, +, 0), HEX_DBL(-, 1, 2c51a4dae87f1, -, 9), + HEX_DBL(+, 1, bcc7e33ddde3, -, 63) }, + { HEX_DBL(+, 1, 00581e4a69944, +, 0), HEX_DBL(-, 1, fc2b7f2d782b1, -, 10), + HEX_DBL(+, 1, fe3ef3300a9fa, -, 64) }, + { HEX_DBL(+, 1, 00481445b39a8, +, 0), HEX_DBL(-, 1, 9fb97df0b0b83, -, 10), + HEX_DBL(+, 1, 0d9a601f2f324, -, 65) }, + { HEX_DBL(+, 1, 00380c42ae963, +, 0), HEX_DBL(-, 1, 434d4546227ae, -, 10), + HEX_DBL(+, 1, 0b9b6a5868f33, -, 63) }, + { HEX_DBL(+, 1, 00280640fa271, +, 0), HEX_DBL(-, 1, cdcda8e930c19, -, 11), + HEX_DBL(+, 1, 3d424ab39f789, -, 64) }, + { HEX_DBL(+, 1, 0018024036051, +, 0), HEX_DBL(-, 1, 150c558601261, -, 11), + HEX_DBL(+, 1, 285bb90327a0f, -, 64) }, + { HEX_DBL(+, 1, 0, +, 0), HEX_DBL(+, 0, 0, +, 0), HEX_DBL(+, 0, 0, +, 0) }, + { HEX_DBL(+, 1, 0, +, 0), HEX_DBL(+, 0, 0, +, 0), HEX_DBL(+, 0, 0, +, 0) }, + { HEX_DBL(+, 1, ffa011fca0a1e, -, 1), HEX_DBL(+, 1, 14e5640c4197b, -, 10), + HEX_DBL(+, 1, 95728136ae401, -, 63) }, + { HEX_DBL(+, 1, ff6031f064e07, -, 1), HEX_DBL(+, 1, cd61806bf532d, -, 10), + HEX_DBL(+, 1, 568a4f35d8538, -, 63) }, + { HEX_DBL(+, 1, ff2061d532b9c, -, 1), HEX_DBL(+, 1, 42e34af550eda, -, 9), + HEX_DBL(+, 1, 8f69cee55fec, -, 62) }, + { HEX_DBL(+, 1, fee0a1a513253, -, 1), HEX_DBL(+, 1, 9f0a5523902ea, -, 9), + HEX_DBL(+, 1, daec734b11615, -, 63) }, + { HEX_DBL(+, 1, fea0f15a12139, -, 1), HEX_DBL(+, 1, fb25e19f11b26, -, 9), + HEX_DBL(+, 1, 8bafca62941da, -, 62) }, + { HEX_DBL(+, 1, fe6150ee3e6d4, -, 1), HEX_DBL(+, 1, 2b9af9a28e282, -, 8), + HEX_DBL(+, 1, 0fd3674e1dc5b, -, 61) }, + { HEX_DBL(+, 1, fe21c05baa109, -, 1), HEX_DBL(+, 1, 599d4678f24b9, -, 8), + HEX_DBL(+, 1, dafce1f09937b, -, 61) }, + { HEX_DBL(+, 1, fde23f9c69cf9, -, 1), HEX_DBL(+, 1, 8799d8c046eb, -, 8), + HEX_DBL(+, 1, ffa0ce0bdd217, -, 65) }, + { HEX_DBL(+, 1, fda2ceaa956e8, -, 1), HEX_DBL(+, 1, b590b1e5951ee, -, 8), + HEX_DBL(+, 1, 645a769232446, -, 62) }, + { HEX_DBL(+, 1, fd636d8047a1f, -, 1), HEX_DBL(+, 1, e381d3555dbcf, -, 8), + HEX_DBL(+, 1, 882320d368331, -, 61) }, + { HEX_DBL(+, 1, fd241c179e0cc, -, 1), HEX_DBL(+, 1, 08b69f3dccde, -, 7), + HEX_DBL(+, 1, 01ad5065aba9e, -, 61) }, + { HEX_DBL(+, 1, fce4da6ab93e8, -, 1), HEX_DBL(+, 1, 1fa97a61dd298, -, 7), + HEX_DBL(+, 1, 84cd1f931ae34, -, 60) }, + { HEX_DBL(+, 1, fca5a873bcb19, -, 1), HEX_DBL(+, 1, 36997bcc54a3f, -, 7), + HEX_DBL(+, 1, 1485e97eaee03, -, 60) }, + { HEX_DBL(+, 1, fc66862ccec93, -, 1), HEX_DBL(+, 1, 4d86a43264a4f, -, 7), + HEX_DBL(+, 1, c75e63370988b, -, 61) }, + { HEX_DBL(+, 1, fc27739018cfe, -, 1), HEX_DBL(+, 1, 6470f448fb09d, -, 7), + HEX_DBL(+, 1, d7361eeaed0a1, -, 65) }, + { HEX_DBL(+, 1, fbe87097c6f5a, -, 1), HEX_DBL(+, 1, 7b586cc4c2523, -, 7), + HEX_DBL(+, 1, b3df952cc473c, -, 61) }, + { HEX_DBL(+, 1, fba97d3e084dd, -, 1), HEX_DBL(+, 1, 923d0e5a21e06, -, 7), + HEX_DBL(+, 1, cf56c7b64ae5d, -, 62) }, + { HEX_DBL(+, 1, fb6a997d0ecdc, -, 1), HEX_DBL(+, 1, a91ed9bd3df9a, -, 7), + HEX_DBL(+, 1, b957bdcd89e43, -, 61) }, + { HEX_DBL(+, 1, fb2bc54f0f4ab, -, 1), HEX_DBL(+, 1, bffdcfa1f7fbb, -, 7), + HEX_DBL(+, 1, ea8cad9a21771, -, 62) }, + { HEX_DBL(+, 1, faed00ae41783, -, 1), HEX_DBL(+, 1, d6d9f0bbee6f6, -, 7), + HEX_DBL(+, 1, 5762a9af89c82, -, 60) }, + { HEX_DBL(+, 1, faae4b94dfe64, -, 1), HEX_DBL(+, 1, edb33dbe7d335, -, 7), + HEX_DBL(+, 1, 21e24fc245697, -, 62) }, + { HEX_DBL(+, 1, fa6fa5fd27ff8, -, 1), HEX_DBL(+, 1, 0244dbae5ed05, -, 6), + HEX_DBL(+, 1, 12ef51b967102, -, 60) }, + { HEX_DBL(+, 1, fa310fe15a078, -, 1), HEX_DBL(+, 1, 0daeaf24c3529, -, 6), + HEX_DBL(+, 1, 10d3cfca60b45, -, 59) }, + { HEX_DBL(+, 1, f9f2893bb9192, -, 1), HEX_DBL(+, 1, 1917199bb66bc, -, 6), + HEX_DBL(+, 1, 6cf6034c32e19, -, 60) }, + { HEX_DBL(+, 1, f9b412068b247, -, 1), HEX_DBL(+, 1, 247e1b6c615d5, -, 6), + HEX_DBL(+, 1, 42f0fffa229f7, -, 61) }, + { HEX_DBL(+, 1, f975aa3c18ed6, -, 1), HEX_DBL(+, 1, 2fe3b4efcc5ad, -, 6), + HEX_DBL(+, 1, 70106136a8919, -, 60) }, + { HEX_DBL(+, 1, f93751d6ae09b, -, 1), HEX_DBL(+, 1, 3b47e67edea93, -, 6), + HEX_DBL(+, 1, 38dd5a4f6959a, -, 59) }, + { HEX_DBL(+, 1, f8f908d098df6, -, 1), HEX_DBL(+, 1, 46aab0725ea6c, -, 6), + HEX_DBL(+, 1, 821fc1e799e01, -, 60) }, + { HEX_DBL(+, 1, f8bacf242aa2c, -, 1), HEX_DBL(+, 1, 520c1322f1e4e, -, 6), + HEX_DBL(+, 1, 129dcda3ad563, -, 60) }, + { HEX_DBL(+, 1, f87ca4cbb755, -, 1), HEX_DBL(+, 1, 5d6c0ee91d2ab, -, 6), + HEX_DBL(+, 1, c5b190c04606e, -, 62) }, + { HEX_DBL(+, 1, f83e89c195c25, -, 1), HEX_DBL(+, 1, 68caa41d448c3, -, 6), + HEX_DBL(+, 1, 4723441195ac9, -, 59) } }; static double __loglTable3[8][3] = { -{HEX_DBL( +, 1, 000e00c40ab89, +, 0 ), HEX_DBL( -, 1, 4332be0032168, -, 12 ), HEX_DBL( +, 1, a1003588d217a, -, 65 )}, -{HEX_DBL( +, 1, 000a006403e82, +, 0 ), HEX_DBL( -, 1, cdb2987366fcc, -, 13 ), HEX_DBL( +, 1, 5c86001294bbc, -, 67 )}, -{HEX_DBL( +, 1, 0006002400d8, +, 0 ), HEX_DBL( -, 1, 150297c90fa6f, -, 13 ), HEX_DBL( +, 1, 01fb4865fae32, -, 66 )}, -{HEX_DBL( +, 1, 0, +, 0 ), HEX_DBL( +, 0, 0, +, 0 ), HEX_DBL( +, 0, 0, +, 0 )}, -{HEX_DBL( +, 1, 0, +, 0 ), HEX_DBL( +, 0, 0, +, 0 ), HEX_DBL( +, 0, 0, +, 0 )}, -{HEX_DBL( +, 1, ffe8011ff280a, -, 1 ), HEX_DBL( +, 1, 14f8daf5e3d3b, -, 12 ), HEX_DBL( +, 1, 3c933b4b6b914, -, 68 )}, -{HEX_DBL( +, 1, ffd8031fc184e, -, 1 ), HEX_DBL( +, 1, cd978c38042bb, -, 12 ), HEX_DBL( +, 1, 10f8e642e66fd, -, 65 )}, -{HEX_DBL( +, 1, ffc8061f5492b, -, 1 ), HEX_DBL( +, 1, 43183c878274e, -, 11 ), HEX_DBL( +, 1, 5885dd1eb6582, -, 65 )} + { HEX_DBL(+, 1, 000e00c40ab89, +, 0), HEX_DBL(-, 1, 4332be0032168, -, 12), + HEX_DBL(+, 1, a1003588d217a, -, 65) }, + { HEX_DBL(+, 1, 000a006403e82, +, 0), HEX_DBL(-, 1, cdb2987366fcc, -, 13), + HEX_DBL(+, 1, 5c86001294bbc, -, 67) }, + { HEX_DBL(+, 1, 0006002400d8, +, 0), HEX_DBL(-, 1, 150297c90fa6f, -, 13), + HEX_DBL(+, 1, 01fb4865fae32, -, 66) }, + { HEX_DBL(+, 1, 0, +, 0), HEX_DBL(+, 0, 0, +, 0), HEX_DBL(+, 0, 0, +, 0) }, + { HEX_DBL(+, 1, 0, +, 0), HEX_DBL(+, 0, 0, +, 0), HEX_DBL(+, 0, 0, +, 0) }, + { HEX_DBL(+, 1, ffe8011ff280a, -, 1), HEX_DBL(+, 1, 14f8daf5e3d3b, -, 12), + HEX_DBL(+, 1, 3c933b4b6b914, -, 68) }, + { HEX_DBL(+, 1, ffd8031fc184e, -, 1), HEX_DBL(+, 1, cd978c38042bb, -, 12), + HEX_DBL(+, 1, 10f8e642e66fd, -, 65) }, + { HEX_DBL(+, 1, ffc8061f5492b, -, 1), HEX_DBL(+, 1, 43183c878274e, -, 11), + HEX_DBL(+, 1, 5885dd1eb6582, -, 65) } }; static void __log2_ep(double *hi, double *lo, double x) { - union { uint64_t i; double d; } uu; + union { + uint64_t i; + double d; + } uu; int m; double f = reference_frexp(x, &m); // bring f in [0.75, 1.5) - if( f < 0.75 ) { + if (f < 0.75) + { f *= 2.0; m -= 1; } // index first table .... brings down to [1-2^-7, 1+2^6) uu.d = f; - int index = (int) (((uu.i + ((uint64_t) 1 << 51)) & 0x000fc00000000000ULL) >> 46); + int index = + (int)(((uu.i + ((uint64_t)1 << 51)) & 0x000fc00000000000ULL) >> 46); double r1 = __loglTable1[index][0]; double logr1hi = __loglTable1[index][1]; double logr1lo = __loglTable1[index][2]; - // since log1rhi has 39 bits of precision, we have 14 bit in hand ... since |m| <= 1023 - // which needs 10bits at max, we can directly add m to log1hi without spilling + // since log1rhi has 39 bits of precision, we have 14 bit in hand ... since + // |m| <= 1023 which needs 10bits at max, we can directly add m to log1hi + // without spilling logr1hi += m; - // argument reduction needs to be in double-double since reduced argument will form the - // leading term of polynomial approximation which sets the precision we eventually achieve + // argument reduction needs to be in double-double since reduced argument + // will form the leading term of polynomial approximation which sets the + // precision we eventually achieve double zhi, zlo; MulD(&zhi, &zlo, r1, uu.d); // second index table .... brings down to [1-2^-12, 1+2^-11) uu.d = zhi; - index = (int) (((uu.i + ((uint64_t) 1 << 46)) & 0x00007e0000000000ULL) >> 41); + index = (int)(((uu.i + ((uint64_t)1 << 46)) & 0x00007e0000000000ULL) >> 41); double r2 = __loglTable2[index][0]; double logr2hi = __loglTable2[index][1]; double logr2lo = __loglTable2[index][2]; @@ -4186,11 +4466,12 @@ static void __log2_ep(double *hi, double *lo, double x) // Actually reduction to 2^-11 would have been sufficient to calculate // second order term in polynomial in double rather than double-double, I // reduced it a bit more to make sure other systematic arithmetic errors - // are guarded against .... also this allow lower order product of leading polynomial - // term i.e. Ao_hi*z_lo + Ao_lo*z_hi to be done in double rather than double-double ... - // hence only term that needs to be done in double-double is Ao_hi*z_hi + // are guarded against .... also this allow lower order product of leading + // polynomial term i.e. Ao_hi*z_lo + Ao_lo*z_hi to be done in double rather + // than double-double ... hence only term that needs to be done in + // double-double is Ao_hi*z_hi uu.d = zhi; - index = (int) (((uu.i + ((uint64_t) 1 << 41)) & 0x0000038000000000ULL) >> 39); + index = (int)(((uu.i + ((uint64_t)1 << 41)) & 0x0000038000000000ULL) >> 39); double r3 = __loglTable3[index][0]; double logr3hi = __loglTable3[index][1]; double logr3lo = __loglTable3[index][2]; @@ -4202,34 +4483,36 @@ static void __log2_ep(double *hi, double *lo, double x) AddDD(&log2hi, &log2lo, logr1hi, logr1lo, logr2hi, logr2lo); AddDD(&log2hi, &log2lo, logr3hi, logr3lo, log2hi, log2lo); - // final argument reduction .... zhi will be in [1-2^-14, 1+2^-13) after this + // final argument reduction .... zhi will be in [1-2^-14, 1+2^-13) after + // this MulDD(&zhi, &zlo, zhi, zlo, r3, 0.0); - // we dont need to do full double-double substract here. substracting 1.0 for higher - // term is exact + // we dont need to do full double-double substract here. substracting 1.0 + // for higher term is exact zhi = zhi - 1.0; // normalize AddD(&zhi, &zlo, zhi, zlo); // polynomail fitting to compute log2(1 + z) ... forth order polynomial fit - // to log2(1 + z)/z gives minimax absolute error of O(2^-76) with z in [-2^-14, 2^-13] - // log2(1 + z)/z = Ao + A1*z + A2*z^2 + A3*z^3 + A4*z^4 + // to log2(1 + z)/z gives minimax absolute error of O(2^-76) with z in + // [-2^-14, 2^-13] log2(1 + z)/z = Ao + A1*z + A2*z^2 + A3*z^3 + A4*z^4 // => log2(1 + z) = Ao*z + A1*z^2 + A2*z^3 + A3*z^4 + A4*z^5 - // => log2(1 + z) = (Aohi + Aolo)*(zhi + zlo) + z^2*(A1 + A2*z + A3*z^2 + A4*z^3) - // since we are looking for at least 64 digits of precision and z in [-2^-14, 2^-13], final term - // can be done in double .... also Aolo*zhi + Aohi*zlo can be done in double .... - // Aohi*zhi needs to be done in double-double - - double Aohi = HEX_DBL( +, 1, 71547652b82fe, +, 0 ); - double Aolo = HEX_DBL( +, 1, 777c9cbb675c, -, 56 ); + // => log2(1 + z) = (Aohi + Aolo)*(zhi + zlo) + z^2*(A1 + A2*z + A3*z^2 + + // A4*z^3) since we are looking for at least 64 digits of precision and z in + // [-2^-14, 2^-13], final term can be done in double .... also Aolo*zhi + + // Aohi*zlo can be done in double .... Aohi*zhi needs to be done in + // double-double + + double Aohi = HEX_DBL(+, 1, 71547652b82fe, +, 0); + double Aolo = HEX_DBL(+, 1, 777c9cbb675c, -, 56); double y; - y = HEX_DBL( +, 1, 276d2736fade7, -, 2 ); - y = HEX_DBL( -, 1, 7154765782df1, -, 2 ) + y*zhi; - y = HEX_DBL( +, 1, ec709dc3a0f67, -, 2 ) + y*zhi; - y = HEX_DBL( -, 1, 71547652b82fe, -, 1 ) + y*zhi; - double zhisq = zhi*zhi; - y = y*zhisq; - y = y + zhi*Aolo; - y = y + zlo*Aohi; + y = HEX_DBL(+, 1, 276d2736fade7, -, 2); + y = HEX_DBL(-, 1, 7154765782df1, -, 2) + y * zhi; + y = HEX_DBL(+, 1, ec709dc3a0f67, -, 2) + y * zhi; + y = HEX_DBL(-, 1, 71547652b82fe, -, 1) + y * zhi; + double zhisq = zhi * zhi; + y = y * zhisq; + y = y + zhi * Aolo; + y = y + zlo * Aohi; MulD(&zhi, &zlo, Aohi, zhi); AddDD(&zhi, &zlo, zhi, zlo, y, 0.0); @@ -4239,10 +4522,8 @@ static void __log2_ep(double *hi, double *lo, double x) *lo = zlo; } -long double reference_powl( long double x, long double y ) +long double reference_powl(long double x, long double y) { - - // this will be used for testing doubles i.e. arguments will // be doubles so cast the input back to double ... returned // result will be long double though .... > 53 bits of precision @@ -4256,174 +4537,163 @@ long double reference_powl( long double x, long double y ) // causes errors. So we need to tread y as long double and convert it // to hi, lo doubles when performing y*log2(x). -// double x = (double) xx; -// double y = (double) yy; - - static const double neg_epsilon = HEX_DBL( +, 1, 0, +, 53 ); + static const double neg_epsilon = HEX_DBL(+, 1, 0, +, 53); - //if x = 1, return x for any y, even NaN - if( x == 1.0 ) - return x; + // if x = 1, return x for any y, even NaN + if (x == 1.0) return x; - //if y == 0, return 1 for any x, even NaN - if( y == 0.0 ) - return 1.0L; + // if y == 0, return 1 for any x, even NaN + if (y == 0.0) return 1.0L; - //get NaNs out of the way - if( x != x || y != y ) - return x + y; + // get NaNs out of the way + if (x != x || y != y) return x + y; - //do the work required to sort out edge cases - double fabsy = reference_fabs( y ); - double fabsx = reference_fabs( x ); - double iy = reference_rint( fabsy ); //we do round to nearest here so that |fy| <= 0.5 - if( iy > fabsy )//convert nearbyint to floor + // do the work required to sort out edge cases + double fabsy = reference_fabs(y); + double fabsx = reference_fabs(x); + double iy = reference_rint( + fabsy); // we do round to nearest here so that |fy| <= 0.5 + if (iy > fabsy) // convert nearbyint to floor iy -= 1.0; int isOddInt = 0; - if( fabsy == iy && !reference_isinf(fabsy) && iy < neg_epsilon ) - isOddInt = (int) (iy - 2.0 * rint( 0.5 * iy )); //might be 0, -1, or 1 + if (fabsy == iy && !reference_isinf(fabsy) && iy < neg_epsilon) + isOddInt = (int)(iy - 2.0 * rint(0.5 * iy)); // might be 0, -1, or 1 - ///test a few more edge cases - //deal with x == 0 cases - if( x == 0.0 ) + /// test a few more edge cases + // deal with x == 0 cases + if (x == 0.0) { - if( ! isOddInt ) - x = 0.0; + if (!isOddInt) x = 0.0; - if( y < 0 ) - x = 1.0/ x; + if (y < 0) x = 1.0 / x; return x; } - //x == +-Inf cases - if( isinf(fabsx) ) + // x == +-Inf cases + if (isinf(fabsx)) { - if( x < 0 ) + if (x < 0) { - if( isOddInt ) + if (isOddInt) { - if( y < 0 ) + if (y < 0) return -0.0; else return -INFINITY; } else { - if( y < 0 ) + if (y < 0) return 0.0; else return INFINITY; } } - if( y < 0 ) - return 0; + if (y < 0) return 0; return INFINITY; } - //y = +-inf cases - if( isinf(fabsy) ) + // y = +-inf cases + if (isinf(fabsy)) { - if( x == -1 ) - return 1; + if (x == -1) return 1; - if( y < 0 ) + if (y < 0) { - if( fabsx < 1 ) - return INFINITY; + if (fabsx < 1) return INFINITY; return 0; } - if( fabsx < 1 ) - return 0; + if (fabsx < 1) return 0; return INFINITY; } // x < 0 and y non integer case - if( x < 0 && iy != fabsy ) + if (x < 0 && iy != fabsy) { - //return nan; + // return nan; return cl_make_nan(); } - //speedy resolution of sqrt and reciprocal sqrt - if( fabsy == 0.5 ) + // speedy resolution of sqrt and reciprocal sqrt + if (fabsy == 0.5) { - long double xl = sqrtl( x ); - if( y < 0 ) - xl = 1.0/ xl; + long double xl = sqrtl(x); + if (y < 0) xl = 1.0 / xl; return xl; } double log2x_hi, log2x_lo; - // extended precision log .... accurate to at least 64-bits + couple of guard bits + // extended precision log .... accurate to at least 64-bits + couple of + // guard bits __log2_ep(&log2x_hi, &log2x_lo, fabsx); double ylog2x_hi, ylog2x_lo; - double y_hi = (double) y; - double y_lo = (double) ( y - (long double) y_hi); + double y_hi = (double)y; + double y_lo = (double)(y - (long double)y_hi); // compute product of y*log2(x) // scale to avoid overflow in double-double multiplication - if( reference_fabs( y ) > HEX_DBL( +, 1, 0, +, 970 ) ) { + if (reference_fabs(y) > HEX_DBL(+, 1, 0, +, 970)) + { y_hi = reference_ldexp(y_hi, -53); y_lo = reference_ldexp(y_lo, -53); } MulDD(&ylog2x_hi, &ylog2x_lo, log2x_hi, log2x_lo, y_hi, y_lo); - if( fabs( y ) > HEX_DBL( +, 1, 0, +, 970 ) ) { + if (fabs(y) > HEX_DBL(+, 1, 0, +, 970)) + { ylog2x_hi = reference_ldexp(ylog2x_hi, 53); ylog2x_lo = reference_ldexp(ylog2x_lo, 53); } long double powxy; - if(isinf(ylog2x_hi) || (reference_fabs(ylog2x_hi) > 2200)) { - powxy = reference_signbit(ylog2x_hi) ? HEX_DBL( +, 0, 0, +, 0 ) : INFINITY; - } else { + if (isinf(ylog2x_hi) || (reference_fabs(ylog2x_hi) > 2200)) + { + powxy = + reference_signbit(ylog2x_hi) ? HEX_DBL(+, 0, 0, +, 0) : INFINITY; + } + else + { // separate integer + fractional part long int m = lrint(ylog2x_hi); AddDD(&ylog2x_hi, &ylog2x_lo, ylog2x_hi, ylog2x_lo, -m, 0.0); // revert to long double arithemtic - long double ylog2x = (long double) ylog2x_hi + (long double) ylog2x_lo; - long double tmp = reference_exp2l( ylog2x ); + long double ylog2x = (long double)ylog2x_hi + (long double)ylog2x_lo; + long double tmp = reference_exp2l(ylog2x); powxy = reference_scalblnl(tmp, m); } // if y is odd integer and x is negative, reverse sign - if( isOddInt & reference_signbit(x)) - powxy = -powxy; + if (isOddInt & reference_signbit(x)) powxy = -powxy; return powxy; } double reference_nextafter(double xx, double yy) { - float x = (float) xx; - float y = (float) yy; + float x = (float)xx; + float y = (float)yy; // take care of nans - if( x != x ) - return x; + if (x != x) return x; - if( y != y ) - return y; + if (y != y) return y; - if( x == y ) - return y; + if (x == y) return y; int32f_t a, b; - a.f = x; - b.f = y; + a.f = x; + b.f = y; - if( a.i & 0x80000000 ) - a.i = 0x80000000 - a.i; - if(b.i & 0x80000000 ) - b.i = 0x80000000 - b.i; + if (a.i & 0x80000000) a.i = 0x80000000 - a.i; + if (b.i & 0x80000000) b.i = 0x80000000 - b.i; a.i += (a.i < b.i) ? 1 : -1; - a.i = (a.i < 0) ? (cl_int) 0x80000000 - a.i : a.i; + a.i = (a.i < 0) ? (cl_int)0x80000000 - a.i : a.i; return a.f; } @@ -4431,33 +4701,28 @@ double reference_nextafter(double xx, double yy) long double reference_nextafterl(long double xx, long double yy) { - double x = (double) xx; - double y = (double) yy; + double x = (double)xx; + double y = (double)yy; // take care of nans - if( x != x ) - return x; + if (x != x) return x; - if( y != y ) - return y; + if (y != y) return y; int64d_t a, b; - a.d = x; - b.d = y; + a.d = x; + b.d = y; int64_t tmp = 0x8000000000000000LL; - if( a.l & tmp ) - a.l = tmp - a.l; - if(b.l & tmp ) - b.l = tmp - b.l; + if (a.l & tmp) a.l = tmp - a.l; + if (b.l & tmp) b.l = tmp - b.l; - // edge case. if (x == y) or (x = 0.0f and y = -0.0f) or (x = -0.0f and y = 0.0f) - // test needs to be done using integer rep because - // subnormals may be flushed to zero on some platforms - if( a.l == b.l ) - return y; + // edge case. if (x == y) or (x = 0.0f and y = -0.0f) or (x = -0.0f and y = + // 0.0f) test needs to be done using integer rep because subnormals may be + // flushed to zero on some platforms + if (a.l == b.l) return y; a.l += (a.l < b.l) ? 1 : -1; a.l = (a.l < 0) ? tmp - a.l : a.l; @@ -4467,112 +4732,108 @@ long double reference_nextafterl(long double xx, long double yy) double reference_fdim(double xx, double yy) { - float x = (float) xx; - float y = (float) yy; + float x = (float)xx; + float y = (float)yy; - if( x != x ) - return x; + if (x != x) return x; - if( y != y ) - return y; + if (y != y) return y; - float r = ( x > y ) ? (float) reference_subtract( x, y) : 0.0f; + float r = (x > y) ? (float)reference_subtract(x, y) : 0.0f; return r; - } long double reference_fdiml(long double xx, long double yy) { - double x = (double) xx; - double y = (double) yy; + double x = (double)xx; + double y = (double)yy; - if( x != x ) - return x; + if (x != x) return x; - if( y != y ) - return y; + if (y != y) return y; - double r = ( x > y ) ? (double) reference_subtractl(x, y) : 0.0; + double r = (x > y) ? (double)reference_subtractl(x, y) : 0.0; return r; } double reference_remquo(double xd, double yd, int *n) { - float xx = (float) xd; - float yy = (float) yd; + float xx = (float)xd; + float yy = (float)yd; - if( isnan(xx) || isnan(yy) || - fabsf(xx) == INFINITY || - yy == 0.0 ) + if (isnan(xx) || isnan(yy) || fabsf(xx) == INFINITY || yy == 0.0) { *n = 0; return cl_make_nan(); } - if( fabsf(yy) == INFINITY || xx == 0.0f ) { + if (fabsf(yy) == INFINITY || xx == 0.0f) + { *n = 0; return xd; } - if( fabsf(xx) == fabsf(yy) ) { + if (fabsf(xx) == fabsf(yy)) + { *n = (xx == yy) ? 1 : -1; - return reference_signbit( xx ) ? -0.0 : 0.0; + return reference_signbit(xx) ? -0.0 : 0.0; } - int signx = reference_signbit( xx ) ? -1 : 1; - int signy = reference_signbit( yy ) ? -1 : 1; + int signx = reference_signbit(xx) ? -1 : 1; + int signy = reference_signbit(yy) ? -1 : 1; int signn = (signx == signy) ? 1 : -1; float x = fabsf(xx); float y = fabsf(yy); int ex, ey; - ex = reference_ilogb( x ); - ey = reference_ilogb( y ); + ex = reference_ilogb(x); + ey = reference_ilogb(y); float xr = x; float yr = y; uint32_t q = 0; - if(ex-ey >= -1) { - - yr = (float) reference_ldexp( y, -ey ); - xr = (float) reference_ldexp( x, -ex ); - - if(ex-ey >= 0) { + if (ex - ey >= -1) + { + yr = (float)reference_ldexp(y, -ey); + xr = (float)reference_ldexp(x, -ex); + if (ex - ey >= 0) + { int i; - for(i = ex-ey; i > 0; i--) { + for (i = ex - ey; i > 0; i--) + { q <<= 1; - if(xr >= yr) { + if (xr >= yr) + { xr -= yr; q += 1; } xr += xr; } q <<= 1; - if( xr > yr ) { + if (xr > yr) + { xr -= yr; q += 1; } } - else //ex-ey = -1 - xr = reference_ldexp(xr, ex-ey); + else // ex-ey = -1 + xr = reference_ldexp(xr, ex - ey); } - if( (yr < 2.0f*xr) || ( (yr == 2.0f*xr) && (q & 0x00000001) ) ) { + if ((yr < 2.0f * xr) || ((yr == 2.0f * xr) && (q & 0x00000001))) + { xr -= yr; q += 1; } - if(ex-ey >= -1) - xr = reference_ldexp(xr, ey); + if (ex - ey >= -1) xr = reference_ldexp(xr, ey); int qout = q & 0x0000007f; - if( signn < 0) - qout = -qout; - if( xx < 0.0 ) - xr = -xr; + if (signn < 0) qout = -qout; + if (xx < 0.0) xr = -xr; *n = qout; @@ -4581,80 +4842,80 @@ double reference_remquo(double xd, double yd, int *n) long double reference_remquol(long double xd, long double yd, int *n) { + double xx = (double)xd; + double yy = (double)yd; - double xx = (double) xd; - double yy = (double) yd; - - if( isnan(xx) || isnan(yy) || - fabs(xx) == INFINITY || - yy == 0.0 ) + if (isnan(xx) || isnan(yy) || fabs(xx) == INFINITY || yy == 0.0) { *n = 0; return cl_make_nan(); } - if( reference_fabs(yy) == INFINITY || xx == 0.0 ) { + if (reference_fabs(yy) == INFINITY || xx == 0.0) + { *n = 0; return xd; } - if( reference_fabs(xx) == reference_fabs(yy) ) { + if (reference_fabs(xx) == reference_fabs(yy)) + { *n = (xx == yy) ? 1 : -1; - return reference_signbit( xx ) ? -0.0 : 0.0; + return reference_signbit(xx) ? -0.0 : 0.0; } - int signx = reference_signbit( xx ) ? -1 : 1; - int signy = reference_signbit( yy ) ? -1 : 1; + int signx = reference_signbit(xx) ? -1 : 1; + int signy = reference_signbit(yy) ? -1 : 1; int signn = (signx == signy) ? 1 : -1; double x = reference_fabs(xx); double y = reference_fabs(yy); int ex, ey; - ex = reference_ilogbl( x ); - ey = reference_ilogbl( y ); + ex = reference_ilogbl(x); + ey = reference_ilogbl(y); double xr = x; double yr = y; uint32_t q = 0; - if(ex-ey >= -1) { - - yr = reference_ldexp( y, -ey ); - xr = reference_ldexp( x, -ex ); + if (ex - ey >= -1) + { + yr = reference_ldexp(y, -ey); + xr = reference_ldexp(x, -ex); int i; - if(ex-ey >= 0) { - - for(i = ex-ey; i > 0; i--) { + if (ex - ey >= 0) + { + for (i = ex - ey; i > 0; i--) + { q <<= 1; - if(xr >= yr) { + if (xr >= yr) + { xr -= yr; q += 1; } xr += xr; } q <<= 1; - if( xr > yr ) { + if (xr > yr) + { xr -= yr; q += 1; } } else - xr = reference_ldexp(xr, ex-ey); + xr = reference_ldexp(xr, ex - ey); } - if( (yr < 2.0*xr) || ( (yr == 2.0*xr) && (q & 0x00000001) ) ) { + if ((yr < 2.0 * xr) || ((yr == 2.0 * xr) && (q & 0x00000001))) + { xr -= yr; q += 1; } - if(ex-ey >= -1) - xr = reference_ldexp(xr, ey); + if (ex - ey >= -1) xr = reference_ldexp(xr, ey); int qout = q & 0x0000007f; - if( signn < 0) - qout = -qout; - if( xx < 0.0 ) - xr = -xr; + if (signn < 0) qout = -qout; + if (xx < 0.0) xr = -xr; *n = qout; return xr; @@ -4662,27 +4923,27 @@ long double reference_remquol(long double xd, long double yd, int *n) static double reference_scalbn(double x, int n) { - if(reference_isinf(x) || reference_isnan(x) || x == 0.0) - return x; + if (reference_isinf(x) || reference_isnan(x) || x == 0.0) return x; int bias = 1023; - union { double d; cl_long l; } u; - u.d = (double) x; + union { + double d; + cl_long l; + } u; + u.d = (double)x; int e = (int)((u.l & 0x7ff0000000000000LL) >> 52); - if(e == 0) + if (e == 0) { u.l |= ((cl_long)1023 << 52); u.d -= 1.0; e = (int)((u.l & 0x7ff0000000000000LL) >> 52) - 1022; } e += n; - if(e >= 2047 || n >= 2098 ) - return reference_copysign(INFINITY, x); - if(e < -51 || n <-2097 ) - return reference_copysign(0.0, x); - if(e <= 0) + if (e >= 2047 || n >= 2098) return reference_copysign(INFINITY, x); + if (e < -51 || n < -2097) return reference_copysign(0.0, x); + if (e <= 0) { - bias += (e-1); + bias += (e - 1); e = 1; } u.l &= 0x800fffffffffffffLL; @@ -4695,26 +4956,26 @@ static double reference_scalbn(double x, int n) static long double reference_scalblnl(long double x, long n) { #if defined(__i386__) || defined(__x86_64__) // INTEL - union - { + union { long double d; - struct{ cl_ulong m; cl_ushort sexp;}u; - }u; + struct + { + cl_ulong m; + cl_ushort sexp; + } u; + } u; u.u.m = CL_LONG_MIN; - if ( reference_isinf(x) ) - return x; + if (reference_isinf(x)) return x; - if( x == 0.0L || n < -2200) - return reference_copysignl( 0.0L, x ); + if (x == 0.0L || n < -2200) return reference_copysignl(0.0L, x); - if( n > 2200 ) - return reference_copysignl( INFINITY, x ); + if (n > 2200) return reference_copysignl(INFINITY, x); - if( n < 0 ) + if (n < 0) { u.u.sexp = 0x3fff - 1022; - while( n <= -1022 ) + while (n <= -1022) { x *= u.d; n += 1022; @@ -4724,10 +4985,10 @@ static long double reference_scalblnl(long double x, long n) return x; } - if( n > 0 ) + if (n > 0) { u.u.sexp = 0x3fff + 1023; - while( n >= 1023 ) + while (n >= 1023) { x *= u.d; n -= 1023; @@ -4742,27 +5003,27 @@ static long double reference_scalblnl(long double x, long n) #elif defined(__arm__) // ARM .. sizeof(long double) == sizeof(double) #if __DBL_MAX_EXP__ >= __LDBL_MAX_EXP__ - if(reference_isinfl(x) || reference_isnanl(x)) - return x; + if (reference_isinfl(x) || reference_isnanl(x)) return x; int bias = 1023; - union { double d; cl_long l; } u; - u.d = (double) x; + union { + double d; + cl_long l; + } u; + u.d = (double)x; int e = (int)((u.l & 0x7ff0000000000000LL) >> 52); - if(e == 0) + if (e == 0) { u.l |= ((cl_long)1023 << 52); u.d -= 1.0; e = (int)((u.l & 0x7ff0000000000000LL) >> 52) - 1022; } e += n; - if(e >= 2047) - return reference_copysignl(INFINITY, x); - if(e < -51) - return reference_copysignl(0.0, x); - if(e <= 0) + if (e >= 2047) return reference_copysignl(INFINITY, x); + if (e < -51) return reference_copysignl(0.0, x); + if (e <= 0) { - bias += (e-1); + bias += (e - 1); e = 1; } u.l &= 0x800fffffffffffffLL; @@ -4772,284 +5033,255 @@ static long double reference_scalblnl(long double x, long n) return x * u.d; #endif -#else // PPC +#else // PPC return scalblnl(x, n); #endif } -double reference_relaxed_exp( double x ) -{ - return reference_exp(x); -} +double reference_relaxed_exp(double x) { return reference_exp(x); } double reference_exp(double x) { - return reference_exp2( x * HEX_DBL( +, 1, 71547652b82fe, +, 0 ) ); + return reference_exp2(x * HEX_DBL(+, 1, 71547652b82fe, +, 0)); } long double reference_expl(long double x) { #if defined(__PPC__) - long double scale, bias; - - // The PPC double long version of expl fails to produce denorm results - // and instead generates a 0.0. Compensate for this limitation by - // computing expl as: - // expl(x + 40) * expl(-40) - // Likewise, overflows can prematurely produce an infinity, so we - // compute expl as: - // expl(x - 40) * expl(40) - scale = 1.0L; - bias = 0.0L; - if (x < -708.0L) { - bias = 40.0; - scale = expl(-40.0L); - } else if (x > 708.0L) { - bias = -40.0L; - scale = expl(40.0L); - } - return expl(x + bias) * scale; + long double scale, bias; + + // The PPC double long version of expl fails to produce denorm results + // and instead generates a 0.0. Compensate for this limitation by + // computing expl as: + // expl(x + 40) * expl(-40) + // Likewise, overflows can prematurely produce an infinity, so we + // compute expl as: + // expl(x - 40) * expl(40) + scale = 1.0L; + bias = 0.0L; + if (x < -708.0L) + { + bias = 40.0; + scale = expl(-40.0L); + } + else if (x > 708.0L) + { + bias = -40.0L; + scale = expl(40.0L); + } + return expl(x + bias) * scale; #else - return expl( x ); + return expl(x); #endif } -double reference_sinh(double x) -{ - return sinh(x); -} +double reference_sinh(double x) { return sinh(x); } -long double reference_sinhl(long double x) -{ - return sinhl(x); -} +long double reference_sinhl(long double x) { return sinhl(x); } double reference_fmod(double x, double y) { - if( x == 0.0 && fabs(y) > 0.0 ) - return x; + if (x == 0.0 && fabs(y) > 0.0) return x; - if( fabs(x) == INFINITY || y == 0 ) - return cl_make_nan(); + if (fabs(x) == INFINITY || y == 0) return cl_make_nan(); - if( fabs(y) == INFINITY ) // we know x is finite from above + if (fabs(y) == INFINITY) // we know x is finite from above return x; #if defined(_MSC_VER) && defined(_M_X64) - return fmod( x, y ); + return fmod(x, y); #else - return fmodf( (float) x, (float) y ); + return fmodf((float)x, (float)y); #endif } long double reference_fmodl(long double x, long double y) { - if( x == 0.0L && fabsl(y) > 0.0L ) - return x; + if (x == 0.0L && fabsl(y) > 0.0L) return x; - if( fabsl(x) == INFINITY || y == 0.0L ) - return cl_make_nan(); + if (fabsl(x) == INFINITY || y == 0.0L) return cl_make_nan(); - if( fabsl(y) == INFINITY ) // we know x is finite from above + if (fabsl(y) == INFINITY) // we know x is finite from above return x; - return fmod( (double) x, (double) y ); + return fmod((double)x, (double)y); } double reference_modf(double x, double *n) { - if(isnan(x)) { + if (isnan(x)) + { *n = cl_make_nan(); return cl_make_nan(); } float nr; - float yr = modff((float) x, &nr); + float yr = modff((float)x, &nr); *n = nr; return yr; } long double reference_modfl(long double x, long double *n) { - if(isnan(x)) { + if (isnan(x)) + { *n = cl_make_nan(); return cl_make_nan(); } double nr; - double yr = modf((double) x, &nr); + double yr = modf((double)x, &nr); *n = nr; return yr; } -long double reference_fractl(long double x, long double *ip ) +long double reference_fractl(long double x, long double *ip) { - if(isnan(x)) { + if (isnan(x)) + { *ip = cl_make_nan(); return cl_make_nan(); } double i; - double f = modf((double) x, &i ); - if( f < 0.0 ) + double f = modf((double)x, &i); + if (f < 0.0) { f = 1.0 + f; i -= 1.0; - if( f == 1.0 ) - f = HEX_DBL( +, 1, fffffffffffff, -, 1 ); + if (f == 1.0) f = HEX_DBL(+, 1, fffffffffffff, -, 1); } *ip = i; return f; } -long double reference_fabsl(long double x) -{ - return fabsl( x ); -} +long double reference_fabsl(long double x) { return fabsl(x); } -double reference_relaxed_log( double x ) +double reference_relaxed_log(double x) { - return (float)reference_log((float)x); + return (float)reference_log((float)x); } double reference_log(double x) { - if( x == 0.0 ) - return -INFINITY; + if (x == 0.0) return -INFINITY; - if( x < 0.0 ) - return cl_make_nan(); + if (x < 0.0) return cl_make_nan(); - if( isinf(x) ) - return INFINITY; + if (isinf(x)) return INFINITY; - double log2Hi = HEX_DBL( +, 1, 62e42fefa39ef, -, 1 ); + double log2Hi = HEX_DBL(+, 1, 62e42fefa39ef, -, 1); double logxHi, logxLo; __log2_ep(&logxHi, &logxLo, x); - return logxHi*log2Hi; + return logxHi * log2Hi; } long double reference_logl(long double x) { - if( x == 0.0 ) - return -INFINITY; + if (x == 0.0) return -INFINITY; - if( x < 0.0 ) - return cl_make_nan(); + if (x < 0.0) return cl_make_nan(); - if( isinf(x) ) - return INFINITY; + if (isinf(x)) return INFINITY; - double log2Hi = HEX_DBL( +, 1, 62e42fefa39ef, -, 1 ); - double log2Lo = HEX_DBL( +, 1, abc9e3b39803f, -, 56 ); + double log2Hi = HEX_DBL(+, 1, 62e42fefa39ef, -, 1); + double log2Lo = HEX_DBL(+, 1, abc9e3b39803f, -, 56); double logxHi, logxLo; __log2_ep(&logxHi, &logxLo, x); - //double rhi, rlo; - //MulDD(&rhi, &rlo, logxHi, logxLo, log2Hi, log2Lo); - //return (long double) rhi + (long double) rlo; - - long double lg2 = (long double) log2Hi + (long double) log2Lo; - long double logx = (long double) logxHi + (long double) logxLo; - return logx*lg2; + long double lg2 = (long double)log2Hi + (long double)log2Lo; + long double logx = (long double)logxHi + (long double)logxLo; + return logx * lg2; } -double reference_relaxed_pow( double x, double y) { - return (float)reference_exp2( ((float)y) * (float)reference_log2((float)x)); +double reference_relaxed_pow(double x, double y) +{ + return (float)reference_exp2(((float)y) * (float)reference_log2((float)x)); } -double reference_pow( double x, double y ) +double reference_pow(double x, double y) { - static const double neg_epsilon = HEX_DBL( +, 1, 0, +, 53 ); + static const double neg_epsilon = HEX_DBL(+, 1, 0, +, 53); - //if x = 1, return x for any y, even NaN - if( x == 1.0 ) - return x; + // if x = 1, return x for any y, even NaN + if (x == 1.0) return x; - //if y == 0, return 1 for any x, even NaN - if( y == 0.0 ) - return 1.0; + // if y == 0, return 1 for any x, even NaN + if (y == 0.0) return 1.0; - //get NaNs out of the way - if( x != x || y != y ) - return x + y; + // get NaNs out of the way + if (x != x || y != y) return x + y; - //do the work required to sort out edge cases - double fabsy = reference_fabs( y ); - double fabsx = reference_fabs( x ); - double iy = reference_rint( fabsy ); //we do round to nearest here so that |fy| <= 0.5 - if( iy > fabsy )//convert nearbyint to floor + // do the work required to sort out edge cases + double fabsy = reference_fabs(y); + double fabsx = reference_fabs(x); + double iy = reference_rint( + fabsy); // we do round to nearest here so that |fy| <= 0.5 + if (iy > fabsy) // convert nearbyint to floor iy -= 1.0; int isOddInt = 0; - if( fabsy == iy && !reference_isinf(fabsy) && iy < neg_epsilon ) - isOddInt = (int) (iy - 2.0 * rint( 0.5 * iy )); //might be 0, -1, or 1 + if (fabsy == iy && !reference_isinf(fabsy) && iy < neg_epsilon) + isOddInt = (int)(iy - 2.0 * rint(0.5 * iy)); // might be 0, -1, or 1 - ///test a few more edge cases - //deal with x == 0 cases - if( x == 0.0 ) + /// test a few more edge cases + // deal with x == 0 cases + if (x == 0.0) { - if( ! isOddInt ) - x = 0.0; + if (!isOddInt) x = 0.0; - if( y < 0 ) - x = 1.0/ x; + if (y < 0) x = 1.0 / x; return x; } - //x == +-Inf cases - if( isinf(fabsx) ) + // x == +-Inf cases + if (isinf(fabsx)) { - if( x < 0 ) + if (x < 0) { - if( isOddInt ) + if (isOddInt) { - if( y < 0 ) + if (y < 0) return -0.0; else return -INFINITY; } else { - if( y < 0 ) + if (y < 0) return 0.0; else return INFINITY; } } - if( y < 0 ) - return 0; + if (y < 0) return 0; return INFINITY; } - //y = +-inf cases - if( isinf(fabsy) ) + // y = +-inf cases + if (isinf(fabsy)) { - if( x == -1 ) - return 1; + if (x == -1) return 1; - if( y < 0 ) + if (y < 0) { - if( fabsx < 1 ) - return INFINITY; + if (fabsx < 1) return INFINITY; return 0; } - if( fabsx < 1 ) - return 0; + if (fabsx < 1) return 0; return INFINITY; } // x < 0 and y non integer case - if( x < 0 && iy != fabsy ) + if (x < 0 && iy != fabsy) { - //return nan; + // return nan; return cl_make_nan(); } - //speedy resolution of sqrt and reciprocal sqrt - if( fabsy == 0.5 ) + // speedy resolution of sqrt and reciprocal sqrt + if (fabsy == 0.5) { - long double xl = reference_sqrt( x ); - if( y < 0 ) - xl = 1.0/ xl; + long double xl = reference_sqrt(x); + if (y < 0) xl = 1.0 / xl; return xl; } @@ -5060,73 +5292,55 @@ double reference_pow( double x, double y ) return isOddInt ? reference_copysignd(result, x) : result; } -double reference_sqrt(double x) -{ - return sqrt(x); -} +double reference_sqrt(double x) { return sqrt(x); } -double reference_floor(double x) -{ - return floorf((float) x); -} +double reference_floor(double x) { return floorf((float)x); } double reference_ldexp(double value, int exponent) { #ifdef __MINGW32__ -/* - * ==================================================== - * This function is from fdlibm: http://www.netlib.org - * It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunSoft, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - if(!finite(value)||value==0.0) return value; - return scalbn(value,exponent); + /* + * ==================================================== + * This function is from fdlibm: http://www.netlib.org + * It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + if (!finite(value) || value == 0.0) return value; + return scalbn(value, exponent); #else return reference_scalbn(value, exponent); #endif } -long double reference_ldexpl(long double x, int n) -{ - return ldexpl( x, n); -} +long double reference_ldexpl(long double x, int n) { return ldexpl(x, n); } -long double reference_coshl(long double x) -{ - return coshl(x); -} +long double reference_coshl(long double x) { return coshl(x); } -double reference_ceil(double x) -{ - return ceilf((float) x); -} +double reference_ceil(double x) { return ceilf((float)x); } long double reference_ceill(long double x) { - if( x == 0.0 || reference_isinfl(x) || reference_isnanl(x) ) - return x; + if (x == 0.0 || reference_isinfl(x) || reference_isnanl(x)) return x; long double absx = reference_fabsl(x); - if( absx >= HEX_LDBL( +, 1, 0, +, 52 ) ) - return x; + if (absx >= HEX_LDBL(+, 1, 0, +, 52)) return x; - if( absx < 1.0 ) + if (absx < 1.0) { - if( x < 0.0 ) + if (x < 0.0) return 0.0; else return 1.0; } - long double r = (long double) ((cl_long) x); + long double r = (long double)((cl_long)x); - if( x > 0.0 && r < x ) - r += 1.0; + if (x > 0.0 && r < x) r += 1.0; return r; } @@ -5137,45 +5351,53 @@ long double reference_acosl(long double x) long double x2 = x * x; int i; - //Prepare a head + tail representation of PI in long double. A good compiler should get rid of all of this work. - static const cl_ulong pi_bits[2] = { 0x3243F6A8885A308DULL, 0x313198A2E0370734ULL}; // first 126 bits of pi http://www.super-computing.org/pi-hexa_current.html + // Prepare a head + tail representation of PI in long double. A good + // compiler should get rid of all of this work. + static const cl_ulong pi_bits[2] = { + 0x3243F6A8885A308DULL, 0x313198A2E0370734ULL + }; // first 126 bits of pi + // http://www.super-computing.org/pi-hexa_current.html long double head, tail, temp; #if __LDBL_MANT_DIG__ >= 64 // long double has 64-bits of precision or greater - temp = (long double) pi_bits[0] * 0x1.0p64L; - head = temp + (long double) pi_bits[1]; - temp -= head; // rounding err rounding pi_bits[1] into head - tail = (long double) pi_bits[1] + temp; - head *= HEX_LDBL( +, 1, 0, -, 125 ); - tail *= HEX_LDBL( +, 1, 0, -, 125 ); + temp = (long double)pi_bits[0] * 0x1.0p64L; + head = temp + (long double)pi_bits[1]; + temp -= head; // rounding err rounding pi_bits[1] into head + tail = (long double)pi_bits[1] + temp; + head *= HEX_LDBL(+, 1, 0, -, 125); + tail *= HEX_LDBL(+, 1, 0, -, 125); #else - head = (long double) pi_bits[0]; - tail = (long double) ((cl_long) pi_bits[0] - (cl_long) head ); // residual part of pi_bits[0] after rounding - tail = tail * HEX_LDBL( +, 1, 0, +, 64 ) + (long double) pi_bits[1]; - head *= HEX_LDBL( +, 1, 0, -, 61 ); - tail *= HEX_LDBL( +, 1, 0, -, 125 ); + head = (long double)pi_bits[0]; + tail = + (long double)((cl_long)pi_bits[0] + - (cl_long) + head); // residual part of pi_bits[0] after rounding + tail = tail * HEX_LDBL(+, 1, 0, +, 64) + (long double)pi_bits[1]; + head *= HEX_LDBL(+, 1, 0, -, 61); + tail *= HEX_LDBL(+, 1, 0, -, 125); #endif // oversize values and NaNs go to NaN - if( ! (x2 <= 1.0) ) - return sqrtl(1.0L - x2 ); + if (!(x2 <= 1.0)) return sqrtl(1.0L - x2); // // deal with large |x|: // sqrt( 1 - x**2) - // acos(|x| > sqrt(0.5)) = 2 * atan( z ); z = -------------------- ; z in [0, sqrt(0.5)/(1+sqrt(0.5) = .4142135...] + // acos(|x| > sqrt(0.5)) = 2 * atan( z ); z = -------------------- ; + // z in [0, sqrt(0.5)/(1+sqrt(0.5) = .4142135...] // 1 + x - if( x2 > 0.5 ) + if (x2 > 0.5) { // we handle the x < 0 case as pi - acos(|x|) - long double sign = reference_copysignl( 1.0L, x ); - long double fabsx = reference_fabsl( x ); - head -= head * sign; // x > 0 ? 0 : pi.hi - tail -= tail * sign; // x > 0 ? 0 : pi.low + long double sign = reference_copysignl(1.0L, x); + long double fabsx = reference_fabsl(x); + head -= head * sign; // x > 0 ? 0 : pi.hi + tail -= tail * sign; // x > 0 ? 0 : pi.low - // z = sqrt( 1-x**2 ) / (1+x) = sqrt( (1-x)(1+x) / (1+x)**2 ) = sqrt( (1-x)/(1+x) ) - long double z2 = (1.0L - fabsx) / (1.0L + fabsx); // z**2 + // z = sqrt( 1-x**2 ) / (1+x) = sqrt( (1-x)(1+x) / (1+x)**2 ) = sqrt( + // (1-x)/(1+x) ) + long double z2 = (1.0L - fabsx) / (1.0L + fabsx); // z**2 long double z = sign * sqrtl(z2); // atan(sqrt(q)) @@ -5185,29 +5407,41 @@ long double reference_acosl(long double x) // Define q = r*r, and solve for atan(r): // // atan(r) = (p(r) + 1) * r = rp(r) + r - static long double atan_coeffs[] = { HEX_LDBL( -, b, 3f52e0c278293b3, -, 67 ), HEX_LDBL( -, a, aaaaaaaaaaa95b8, -, 5 ), - HEX_LDBL( +, c, ccccccccc992407, -, 6 ), HEX_LDBL( -, 9, 24924923024398, -, 6 ), - HEX_LDBL( +, e, 38e38d6f92c98f3, -, 7 ), HEX_LDBL( -, b, a2e89bfb8393ec6, -, 7 ), - HEX_LDBL( +, 9, d89a9f574d412cb, -, 7 ), HEX_LDBL( -, 8, 88580517884c547, -, 7 ), - HEX_LDBL( +, f, 0ab6756abdad408, -, 8 ), HEX_LDBL( -, d, 56a5b07a2f15b49, -, 8 ), - HEX_LDBL( +, b, 72ab587e46d80b2, -, 8 ), HEX_LDBL( -, 8, 62ea24bb5b2e636, -, 8 ), - HEX_LDBL( +, e, d67c16582123937, -, 10 ) }; // minimax fit over [ 0x1.0p-52, 0.18] Max error: 0x1.67ea5c184e5d9p-64 + static long double atan_coeffs[] = { + HEX_LDBL(-, b, 3f52e0c278293b3, -, 67), + HEX_LDBL(-, a, aaaaaaaaaaa95b8, -, 5), + HEX_LDBL(+, c, ccccccccc992407, -, 6), + HEX_LDBL(-, 9, 24924923024398, -, 6), + HEX_LDBL(+, e, 38e38d6f92c98f3, -, 7), + HEX_LDBL(-, b, a2e89bfb8393ec6, -, 7), + HEX_LDBL(+, 9, d89a9f574d412cb, -, 7), + HEX_LDBL(-, 8, 88580517884c547, -, 7), + HEX_LDBL(+, f, 0ab6756abdad408, -, 8), + HEX_LDBL(-, d, 56a5b07a2f15b49, -, 8), + HEX_LDBL(+, b, 72ab587e46d80b2, -, 8), + HEX_LDBL(-, 8, 62ea24bb5b2e636, -, 8), + HEX_LDBL(+, e, d67c16582123937, -, 10) + }; // minimax fit over [ 0x1.0p-52, 0.18] Max error: + // 0x1.67ea5c184e5d9p-64 // Calculate y = p(r) - const size_t atan_coeff_count = sizeof( atan_coeffs ) / sizeof( atan_coeffs[0] ); - long double y = atan_coeffs[ atan_coeff_count - 1]; - for( i = (int)atan_coeff_count - 2; i >= 0; i-- ) + const size_t atan_coeff_count = + sizeof(atan_coeffs) / sizeof(atan_coeffs[0]); + long double y = atan_coeffs[atan_coeff_count - 1]; + for (i = (int)atan_coeff_count - 2; i >= 0; i--) y = atan_coeffs[i] + y * z2; - z *= 2.0L; // fold in 2.0 for 2.0 * atan(z) - y *= z; // rp(r) + z *= 2.0L; // fold in 2.0 for 2.0 * atan(z) + y *= z; // rp(r) return head + ((y + tail) + z); } // do |x| <= sqrt(0.5) here - // acos( sqrt(z) ) - PI/2 - // Piecewise minimax polynomial fits for p(z) = 1 + ------------------------; + // acos( sqrt(z) ) - + // PI/2 + // Piecewise minimax polynomial fits for p(z) = 1 + + // ------------------------; // sqrt(z) // // Define z = x*x, and solve for acos(x) over x in x >= 0: @@ -5215,52 +5449,88 @@ long double reference_acosl(long double x) // acos( sqrt(z) ) = acos(x) = x*(p(z)-1) + PI/2 = xp(x**2) - x + PI/2 // const long double coeffs[4][14] = { - { HEX_LDBL( -, a, fa7382e1f347974, -, 10 ), HEX_LDBL( -, b, 4d5a992de1ac4da, -, 6 ), - HEX_LDBL( -, a, c526184bd558c17, -, 7 ), HEX_LDBL( -, d, 9ed9b0346ec092a, -, 8 ), - HEX_LDBL( -, 9, dca410c1f04b1f, -, 8 ), HEX_LDBL( -, f, 76e411ba9581ee5, -, 9 ), - HEX_LDBL( -, c, c71b00479541d8e, -, 9 ), HEX_LDBL( -, a, f527a3f9745c9de, -, 9 ), - HEX_LDBL( -, 9, a93060051f48d14, -, 9 ), HEX_LDBL( -, 8, b3d39ad70e06021, -, 9 ), - HEX_LDBL( -, f, f2ab95ab84f79c, -, 10 ), HEX_LDBL( -, e, d1af5f5301ccfe4, -, 10 ), - HEX_LDBL( -, e, 1b53ba562f0f74a, -, 10 ), HEX_LDBL( -, d, 6a3851330e15526, -, 10 ) }, // x - 0.0625 in [ -0x1.fffffffffp-5, 0x1.0p-4 ] Error: 0x1.97839bf07024p-76 - - { HEX_LDBL( -, 8, c2f1d638e4c1b48, -, 8 ), HEX_LDBL( -, c, d47ac903c311c2c, -, 6 ), - HEX_LDBL( -, d, e020b2dabd5606a, -, 7 ), HEX_LDBL( -, a, 086fafac220f16b, -, 7 ), - HEX_LDBL( -, 8, 55b5efaf6b86c3e, -, 7 ), HEX_LDBL( -, f, 05c9774fed2f571, -, 8 ), - HEX_LDBL( -, e, 484a93f7f0fc772, -, 8 ), HEX_LDBL( -, e, 1a32baef01626e4, -, 8 ), - HEX_LDBL( -, e, 528e525b5c9c73d, -, 8 ), HEX_LDBL( -, e, ddd5d27ad49b2c8, -, 8 ), - HEX_LDBL( -, f, b3259e7ae10c6f, -, 8 ), HEX_LDBL( -, 8, 68998170d5b19b7, -, 7 ), - HEX_LDBL( -, 9, 4468907f007727, -, 7 ), HEX_LDBL( -, a, 2ad5e4906a8e7b3, -, 7 ) },// x - 0.1875 in [ -0x1.0p-4, 0x1.0p-4 ] Error: 0x1.647af70073457p-73 - - { HEX_LDBL( -, f, a76585ad399e7ac, -, 8 ), HEX_LDBL( -, e, d665b7dd504ca7c, -, 6 ), - HEX_LDBL( -, 9, 4c7c2402bd4bc33, -, 6 ), HEX_LDBL( -, f, ba76b69074ff71c, -, 7 ), - HEX_LDBL( -, f, 58117784bdb6d5f, -, 7 ), HEX_LDBL( -, 8, 22ddd8eef53227d, -, 6 ), - HEX_LDBL( -, 9, 1d1d3b57a63cdb4, -, 6 ), HEX_LDBL( -, a, 9c4bdc40cca848, -, 6 ), - HEX_LDBL( -, c, b673b12794edb24, -, 6 ), HEX_LDBL( -, f, 9290a06e31575bf, -, 6 ), - HEX_LDBL( -, 9, b4929c16aeb3d1f, -, 5 ), HEX_LDBL( -, c, 461e725765a7581, -, 5 ), - HEX_LDBL( -, 8, 0a59654c98d9207, -, 4 ), HEX_LDBL( -, a, 6de6cbd96c80562, -, 4 ) }, // x - 0.3125 in [ -0x1.0p-4, 0x1.0p-4 ] Error: 0x1.b0246c304ce1ap-70 - - { HEX_LDBL( -, b, dca8b0359f96342, -, 7 ), HEX_LDBL( -, 8, cd2522fcde9823, -, 5 ), - HEX_LDBL( -, d, 2af9397b27ff74d, -, 6 ), HEX_LDBL( -, d, 723f2c2c2409811, -, 6 ), - HEX_LDBL( -, f, ea8f8481ecc3cd1, -, 6 ), HEX_LDBL( -, a, 43fd8a7a646b0b2, -, 5 ), - HEX_LDBL( -, e, 01b0bf63a4e8d76, -, 5 ), HEX_LDBL( -, 9, f0b7096a2a7b4d, -, 4 ), - HEX_LDBL( -, e, 872e7c5a627ab4c, -, 4 ), HEX_LDBL( -, a, dbd760a1882da48, -, 3 ), - HEX_LDBL( -, 8, 424e4dea31dd273, -, 2 ), HEX_LDBL( -, c, c05d7730963e793, -, 2 ), - HEX_LDBL( -, a, 523d97197cd124a, -, 1 ), HEX_LDBL( -, 8, 307ba943978aaee, +, 0 ) } // x - 0.4375 in [ -0x1.0p-4, 0x1.0p-4 ] Error: 0x1.9ecff73da69c9p-66 - }; + { HEX_LDBL(-, a, fa7382e1f347974, -, 10), + HEX_LDBL(-, b, 4d5a992de1ac4da, -, 6), + HEX_LDBL(-, a, c526184bd558c17, -, 7), + HEX_LDBL(-, d, 9ed9b0346ec092a, -, 8), + HEX_LDBL(-, 9, dca410c1f04b1f, -, 8), + HEX_LDBL(-, f, 76e411ba9581ee5, -, 9), + HEX_LDBL(-, c, c71b00479541d8e, -, 9), + HEX_LDBL(-, a, f527a3f9745c9de, -, 9), + HEX_LDBL(-, 9, a93060051f48d14, -, 9), + HEX_LDBL(-, 8, b3d39ad70e06021, -, 9), + HEX_LDBL(-, f, f2ab95ab84f79c, -, 10), + HEX_LDBL(-, e, d1af5f5301ccfe4, -, 10), + HEX_LDBL(-, e, 1b53ba562f0f74a, -, 10), + HEX_LDBL(-, d, 6a3851330e15526, -, + 10) }, // x - 0.0625 in [ -0x1.fffffffffp-5, 0x1.0p-4 ] + // Error: 0x1.97839bf07024p-76 + + { HEX_LDBL(-, 8, c2f1d638e4c1b48, -, 8), + HEX_LDBL(-, c, d47ac903c311c2c, -, 6), + HEX_LDBL(-, d, e020b2dabd5606a, -, 7), + HEX_LDBL(-, a, 086fafac220f16b, -, 7), + HEX_LDBL(-, 8, 55b5efaf6b86c3e, -, 7), + HEX_LDBL(-, f, 05c9774fed2f571, -, 8), + HEX_LDBL(-, e, 484a93f7f0fc772, -, 8), + HEX_LDBL(-, e, 1a32baef01626e4, -, 8), + HEX_LDBL(-, e, 528e525b5c9c73d, -, 8), + HEX_LDBL(-, e, ddd5d27ad49b2c8, -, 8), + HEX_LDBL(-, f, b3259e7ae10c6f, -, 8), + HEX_LDBL(-, 8, 68998170d5b19b7, -, 7), + HEX_LDBL(-, 9, 4468907f007727, -, 7), + HEX_LDBL(-, a, 2ad5e4906a8e7b3, -, + 7) }, // x - 0.1875 in [ -0x1.0p-4, 0x1.0p-4 ] Error: + // 0x1.647af70073457p-73 + + { HEX_LDBL(-, f, a76585ad399e7ac, -, 8), + HEX_LDBL(-, e, d665b7dd504ca7c, -, 6), + HEX_LDBL(-, 9, 4c7c2402bd4bc33, -, 6), + HEX_LDBL(-, f, ba76b69074ff71c, -, 7), + HEX_LDBL(-, f, 58117784bdb6d5f, -, 7), + HEX_LDBL(-, 8, 22ddd8eef53227d, -, 6), + HEX_LDBL(-, 9, 1d1d3b57a63cdb4, -, 6), + HEX_LDBL(-, a, 9c4bdc40cca848, -, 6), + HEX_LDBL(-, c, b673b12794edb24, -, 6), + HEX_LDBL(-, f, 9290a06e31575bf, -, 6), + HEX_LDBL(-, 9, b4929c16aeb3d1f, -, 5), + HEX_LDBL(-, c, 461e725765a7581, -, 5), + HEX_LDBL(-, 8, 0a59654c98d9207, -, 4), + HEX_LDBL(-, a, 6de6cbd96c80562, -, + 4) }, // x - 0.3125 in [ -0x1.0p-4, 0x1.0p-4 ] Error: + // 0x1.b0246c304ce1ap-70 + + { HEX_LDBL(-, b, dca8b0359f96342, -, 7), + HEX_LDBL(-, 8, cd2522fcde9823, -, 5), + HEX_LDBL(-, d, 2af9397b27ff74d, -, 6), + HEX_LDBL(-, d, 723f2c2c2409811, -, 6), + HEX_LDBL(-, f, ea8f8481ecc3cd1, -, 6), + HEX_LDBL(-, a, 43fd8a7a646b0b2, -, 5), + HEX_LDBL(-, e, 01b0bf63a4e8d76, -, 5), + HEX_LDBL(-, 9, f0b7096a2a7b4d, -, 4), + HEX_LDBL(-, e, 872e7c5a627ab4c, -, 4), + HEX_LDBL(-, a, dbd760a1882da48, -, 3), + HEX_LDBL(-, 8, 424e4dea31dd273, -, 2), + HEX_LDBL(-, c, c05d7730963e793, -, 2), + HEX_LDBL(-, a, 523d97197cd124a, -, 1), + HEX_LDBL(-, 8, 307ba943978aaee, +, + 0) } // x - 0.4375 in [ -0x1.0p-4, 0x1.0p-4 ] Error: + // 0x1.9ecff73da69c9p-66 + }; const long double offsets[4] = { 0.0625, 0.1875, 0.3125, 0.4375 }; - const size_t coeff_count = sizeof( coeffs[0] ) / sizeof( coeffs[0][0] ); + const size_t coeff_count = sizeof(coeffs[0]) / sizeof(coeffs[0][0]); - // reduce the incoming values a bit so that they are in the range [-0x1.0p-4, 0x1.0p-4] + // reduce the incoming values a bit so that they are in the range + // [-0x1.0p-4, 0x1.0p-4] const long double *c; i = x2 * 8.0L; c = coeffs[i]; - x2 -= offsets[i]; // exact + x2 -= offsets[i]; // exact // calcualte p(x2) - long double y = c[ coeff_count - 1]; - for( i = (int)coeff_count - 2; i >= 0; i-- ) - y = c[i] + y * x2; + long double y = c[coeff_count - 1]; + for (i = (int)coeff_count - 2; i >= 0; i--) y = c[i] + y * x2; // xp(x2) y *= x; @@ -5273,58 +5543,46 @@ double reference_relaxed_acos(double x) { return reference_acos(x); } double reference_log10(double x) { - if( x == 0.0 ) - return -INFINITY; + if (x == 0.0) return -INFINITY; - if( x < 0.0 ) - return cl_make_nan(); + if (x < 0.0) return cl_make_nan(); - if( isinf(x) ) - return INFINITY; + if (isinf(x)) return INFINITY; - double log2Hi = HEX_DBL( +, 1, 34413509f79fe, -, 2 ); + double log2Hi = HEX_DBL(+, 1, 34413509f79fe, -, 2); double logxHi, logxLo; __log2_ep(&logxHi, &logxLo, x); - return logxHi*log2Hi; + return logxHi * log2Hi; } double reference_relaxed_log10(double x) { return reference_log10(x); } long double reference_log10l(long double x) { - if( x == 0.0 ) - return -INFINITY; + if (x == 0.0) return -INFINITY; - if( x < 0.0 ) - return cl_make_nan(); + if (x < 0.0) return cl_make_nan(); - if( isinf(x) ) - return INFINITY; + if (isinf(x)) return INFINITY; - double log2Hi = HEX_DBL( +, 1, 34413509f79fe, -, 2 ); - double log2Lo = HEX_DBL( +, 1, e623e2566b02d, -, 55 ); + double log2Hi = HEX_DBL(+, 1, 34413509f79fe, -, 2); + double log2Lo = HEX_DBL(+, 1, e623e2566b02d, -, 55); double logxHi, logxLo; __log2_ep(&logxHi, &logxLo, x); - //double rhi, rlo; - //MulDD(&rhi, &rlo, logxHi, logxLo, log2Hi, log2Lo); - //return (long double) rhi + (long double) rlo; - - long double lg2 = (long double) log2Hi + (long double) log2Lo; - long double logx = (long double) logxHi + (long double) logxLo; - return logx*lg2; + long double lg2 = (long double)log2Hi + (long double)log2Lo; + long double logx = (long double)logxHi + (long double)logxLo; + return logx * lg2; } -double reference_acos(double x) -{ - return acos( x ); -} +double reference_acos(double x) { return acos(x); } double reference_atan2(double x, double y) { #if defined(_WIN32) // fix edge cases for Windows - if (isinf(x) && isinf(y)) { + if (isinf(x) && isinf(y)) + { double retval = (y > 0) ? M_PI_4 : 3.f * M_PI_4; return (x > 0) ? retval : -retval; } @@ -5336,7 +5594,8 @@ long double reference_atan2l(long double x, long double y) { #if defined(_WIN32) // fix edge cases for Windows - if (isinf(x) && isinf(y)) { + if (isinf(x) && isinf(y)) + { long double retval = (y > 0) ? M_PI_4 : 3.f * M_PI_4; return (x > 0) ? retval : -retval; } @@ -5346,7 +5605,7 @@ long double reference_atan2l(long double x, long double y) double reference_frexp(double a, int *exp) { - if(isnan(a) || isinf(a) || a == 0.0) + if (isnan(a) || isinf(a) || a == 0.0) { *exp = 0; return a; @@ -5364,7 +5623,7 @@ double reference_frexp(double a, int *exp) u.l &= 0x7fffffffffffffffULL; int bias = -1022; - if((u.l & 0x7ff0000000000000ULL) == 0) + if ((u.l & 0x7ff0000000000000ULL) == 0) { double d = u.l; u.d = d; @@ -5383,13 +5642,13 @@ double reference_frexp(double a, int *exp) long double reference_frexpl(long double a, int *exp) { - if(isnan(a) || isinf(a) || a == 0.0) + if (isnan(a) || isinf(a) || a == 0.0) { *exp = 0; return a; } - if(sizeof(long double) == sizeof(double)) + if (sizeof(long double) == sizeof(double)) { return reference_frexp(a, exp); } @@ -5400,92 +5659,64 @@ long double reference_frexpl(long double a, int *exp) } -double reference_atan(double x) -{ - return atan( x ); -} +double reference_atan(double x) { return atan(x); } -long double reference_atanl(long double x) -{ - return atanl( x ); -} +long double reference_atanl(long double x) { return atanl(x); } -long double reference_asinl(long double x) -{ - return asinl( x ); -} +long double reference_asinl(long double x) { return asinl(x); } -double reference_asin(double x) -{ - return asin( x ); -} +double reference_asin(double x) { return asin(x); } double reference_relaxed_asin(double x) { return reference_asin(x); } -double reference_fabs(double x) -{ - return fabs( x); -} +double reference_fabs(double x) { return fabs(x); } -double reference_cosh(double x) -{ - return cosh( x ); -} +double reference_cosh(double x) { return cosh(x); } long double reference_sqrtl(long double x) { -#if defined( __SSE2__ ) || (defined( _MSC_VER ) && (defined(_M_IX86) || defined(_M_X64))) - __m128d result128 = _mm_set_sd((double) x); +#if defined(__SSE2__) \ + || (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) + __m128d result128 = _mm_set_sd((double)x); result128 = _mm_sqrt_sd(result128, result128); return _mm_cvtsd_f64(result128); #else volatile double dx = x; - return sqrt( dx ); + return sqrt(dx); #endif } -long double reference_tanhl(long double x) -{ - return tanhl( x ); -} +long double reference_tanhl(long double x) { return tanhl(x); } long double reference_floorl(long double x) { - if( x == 0.0 || reference_isinfl(x) || reference_isnanl(x) ) - return x; + if (x == 0.0 || reference_isinfl(x) || reference_isnanl(x)) return x; long double absx = reference_fabsl(x); - if( absx >= HEX_LDBL( +, 1, 0, +, 52 ) ) - return x; + if (absx >= HEX_LDBL(+, 1, 0, +, 52)) return x; - if( absx < 1.0 ) + if (absx < 1.0) { - if( x < 0.0 ) + if (x < 0.0) return -1.0; else return 0.0; } - long double r = (long double) ((cl_long) x); + long double r = (long double)((cl_long)x); - if( x < 0.0 && r > x ) - r -= 1.0; + if (x < 0.0 && r > x) r -= 1.0; return r; } -double reference_tanh(double x) -{ - return tanh( x ); -} +double reference_tanh(double x) { return tanh(x); } -long double reference_assignmentl( long double x ){ return x; } +long double reference_assignmentl(long double x) { return x; } -int reference_notl( long double x ) +int reference_notl(long double x) { int r = !x; return r; } - - diff --git a/test_conformance/math_brute_force/reference_math.h b/test_conformance/math_brute_force/reference_math.h index 7c751f68c5..78b245105e 100644 --- a/test_conformance/math_brute_force/reference_math.h +++ b/test_conformance/math_brute_force/reference_math.h @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -16,223 +16,221 @@ #ifndef REFERENCE_MATH_H #define REFERENCE_MATH_H -#if defined( __APPLE__ ) - #include +#if defined(__APPLE__) +#include #else - #include +#include #endif // -- for testing float -- -double reference_sinh( double x ); -double reference_sqrt( double x ); -double reference_tanh( double x ); -double reference_acos( double ); -double reference_asin( double ); -double reference_atan( double ); -double reference_atan2( double, double ); -double reference_ceil( double ); -double reference_cosh( double ); -double reference_exp( double ); -double reference_fabs( double ); -double reference_acospi( double ); -double reference_asinpi( double ); -double reference_atanpi( double ); -double reference_atan2pi( double, double ); -double reference_cospi( double ); -double reference_divide( double, double ); -double reference_fract( double, double * ); -float reference_fma( float, float, float, int ); -double reference_mad( double, double, double ); -double reference_nextafter(double, double ); -double reference_recip( double ); -double reference_rootn( double, int ); -double reference_rsqrt( double ); -double reference_sincos( double, double * ); -double reference_sinpi( double ); -double reference_tanpi( double ); +double reference_sinh(double x); +double reference_sqrt(double x); +double reference_tanh(double x); +double reference_acos(double); +double reference_asin(double); +double reference_atan(double); +double reference_atan2(double, double); +double reference_ceil(double); +double reference_cosh(double); +double reference_exp(double); +double reference_fabs(double); +double reference_acospi(double); +double reference_asinpi(double); +double reference_atanpi(double); +double reference_atan2pi(double, double); +double reference_cospi(double); +double reference_divide(double, double); +double reference_fract(double, double*); +float reference_fma(float, float, float, int); +double reference_mad(double, double, double); +double reference_nextafter(double, double); +double reference_recip(double); +double reference_rootn(double, int); +double reference_rsqrt(double); +double reference_sincos(double, double*); +double reference_sinpi(double); +double reference_tanpi(double); double reference_pow(double x, double y); -double reference_pown( double, int ); -double reference_powr( double, double ); -double reference_cos( double ); -double reference_sin( double ); -double reference_tan( double ); -double reference_log( double ); -double reference_log10( double ); -double reference_modf( double, double *n ); - -double reference_fdim( double, double ); -double reference_add( double, double ); -double reference_subtract( double, double ); -double reference_divide( double, double ); -double reference_multiply( double, double ); -double reference_remquo( double, double, int* ); -double reference_lgamma_r( double, int* ); - -int reference_isequal( double, double ); -int reference_isfinite( double ); -int reference_isgreater( double, double ); -int reference_isgreaterequal( double, double ); -int reference_isinf( double ); -int reference_isless( double, double ); -int reference_islessequal( double, double ); -int reference_islessgreater( double, double ); -int reference_isnan( double ); -int reference_isnormal( double ); -int reference_isnotequal( double, double ); -int reference_isordered( double, double ); -int reference_isunordered( double, double ); -int reference_signbit( float ); - -double reference_acosh( double x ); -double reference_asinh( double x ); -double reference_atanh( double x ); +double reference_pown(double, int); +double reference_powr(double, double); +double reference_cos(double); +double reference_sin(double); +double reference_tan(double); +double reference_log(double); +double reference_log10(double); +double reference_modf(double, double* n); + +double reference_fdim(double, double); +double reference_add(double, double); +double reference_subtract(double, double); +double reference_divide(double, double); +double reference_multiply(double, double); +double reference_remquo(double, double, int*); +double reference_lgamma_r(double, int*); + +int reference_isequal(double, double); +int reference_isfinite(double); +int reference_isgreater(double, double); +int reference_isgreaterequal(double, double); +int reference_isinf(double); +int reference_isless(double, double); +int reference_islessequal(double, double); +int reference_islessgreater(double, double); +int reference_isnan(double); +int reference_isnormal(double); +int reference_isnotequal(double, double); +int reference_isordered(double, double); +int reference_isunordered(double, double); +int reference_signbit(float); + +double reference_acosh(double x); +double reference_asinh(double x); +double reference_atanh(double x); double reference_cbrt(double x); -float reference_copysign( float x, float y); -double reference_copysignd( double x, double y); -double reference_exp10( double ); -double reference_exp2( double x ); -double reference_expm1( double x ); -double reference_fmax( double x, double y ); -double reference_fmin( double x, double y ); -double reference_hypot( double x, double y ); -double reference_lgamma( double x); -int reference_ilogb( double ); -double reference_log2( double x ); -double reference_log1p( double x ); -double reference_logb( double x ); -double reference_maxmag( double x, double y ); -double reference_minmag( double x, double y ); -double reference_nan( cl_uint x ); -double reference_reciprocal( double x ); -double reference_remainder( double x, double y ); -double reference_rint( double x ); -double reference_round( double x ); -double reference_trunc( double x ); -double reference_floor( double x ); -double reference_fmod( double x, double y ); -double reference_frexp( double x, int *n ); -double reference_ldexp( double x, int n ); - -double reference_assignment( double x ); -int reference_not( double x ); +float reference_copysign(float x, float y); +double reference_copysignd(double x, double y); +double reference_exp10(double); +double reference_exp2(double x); +double reference_expm1(double x); +double reference_fmax(double x, double y); +double reference_fmin(double x, double y); +double reference_hypot(double x, double y); +double reference_lgamma(double x); +int reference_ilogb(double); +double reference_log2(double x); +double reference_log1p(double x); +double reference_logb(double x); +double reference_maxmag(double x, double y); +double reference_minmag(double x, double y); +double reference_nan(cl_uint x); +double reference_reciprocal(double x); +double reference_remainder(double x, double y); +double reference_rint(double x); +double reference_round(double x); +double reference_trunc(double x); +double reference_floor(double x); +double reference_fmod(double x, double y); +double reference_frexp(double x, int* n); +double reference_ldexp(double x, int n); + +double reference_assignment(double x); +int reference_not(double x); // -- for testing fast-relaxed double reference_relaxed_acos(double); double reference_relaxed_asin(double); double reference_relaxed_atan(double); -double reference_relaxed_mad( double, double, double ); -double reference_relaxed_divide( double x, double y ); -double reference_relaxed_sin( double x ); +double reference_relaxed_mad(double, double, double); +double reference_relaxed_divide(double x, double y); +double reference_relaxed_sin(double x); double reference_relaxed_sinpi(double x); -double reference_relaxed_cos( double x ); +double reference_relaxed_cos(double x); double reference_relaxed_cospi(double x); -double reference_relaxed_sincos( double x, double * y); -double reference_relaxed_tan( double x ); -double reference_relaxed_exp( double x ); -double reference_relaxed_exp2( double x ); -double reference_relaxed_exp10( double x ); -double reference_relaxed_log( double x ); -double reference_relaxed_log2( double x ); +double reference_relaxed_sincos(double x, double* y); +double reference_relaxed_tan(double x); +double reference_relaxed_exp(double x); +double reference_relaxed_exp2(double x); +double reference_relaxed_exp10(double x); +double reference_relaxed_log(double x); +double reference_relaxed_log2(double x); double reference_relaxed_log10(double x); -double reference_relaxed_pow( double x, double y); -double reference_relaxed_reciprocal( double x ); +double reference_relaxed_pow(double x, double y); +double reference_relaxed_reciprocal(double x); // -- for testing double -- -long double reference_sinhl( long double x ); -long double reference_sqrtl( long double x ); -long double reference_tanhl( long double x ); -long double reference_acosl( long double ); -long double reference_asinl( long double ); -long double reference_atanl( long double ); -long double reference_atan2l( long double, long double ); -long double reference_ceill( long double ); -long double reference_coshl( long double ); -long double reference_expl( long double ); -long double reference_fabsl( long double ); -long double reference_acospil( long double ); -long double reference_asinpil( long double ); -long double reference_atanpil( long double ); -long double reference_atan2pil( long double, long double ); -long double reference_cospil( long double ); -long double reference_dividel( long double, long double ); -long double reference_fractl( long double, long double * ); -long double reference_fmal( long double, long double, long double ); -long double reference_madl( long double, long double, long double ); -long double reference_nextafterl(long double, long double ); -long double reference_recipl( long double ); -long double reference_rootnl( long double, int ); -long double reference_rsqrtl( long double ); -long double reference_sincosl( long double, long double * ); -long double reference_sinpil( long double ); -long double reference_tanpil( long double ); +long double reference_sinhl(long double x); +long double reference_sqrtl(long double x); +long double reference_tanhl(long double x); +long double reference_acosl(long double); +long double reference_asinl(long double); +long double reference_atanl(long double); +long double reference_atan2l(long double, long double); +long double reference_ceill(long double); +long double reference_coshl(long double); +long double reference_expl(long double); +long double reference_fabsl(long double); +long double reference_acospil(long double); +long double reference_asinpil(long double); +long double reference_atanpil(long double); +long double reference_atan2pil(long double, long double); +long double reference_cospil(long double); +long double reference_dividel(long double, long double); +long double reference_fractl(long double, long double*); +long double reference_fmal(long double, long double, long double); +long double reference_madl(long double, long double, long double); +long double reference_nextafterl(long double, long double); +long double reference_recipl(long double); +long double reference_rootnl(long double, int); +long double reference_rsqrtl(long double); +long double reference_sincosl(long double, long double*); +long double reference_sinpil(long double); +long double reference_tanpil(long double); long double reference_powl(long double x, long double y); -long double reference_pownl( long double, int ); -long double reference_powrl( long double, long double ); -long double reference_cosl( long double ); -long double reference_sinl(long double ); -long double reference_tanl( long double ); -long double reference_logl( long double ); -long double reference_log10l( long double ); -long double reference_modfl( long double, long double *n ); - - -long double reference_fdiml( long double, long double ); -long double reference_addl( long double, long double ); -long double reference_subtractl( long double, long double ); -long double reference_dividel( long double, long double ); -long double reference_multiplyl( long double, long double ); -long double reference_remquol( long double, long double, int* ); -long double reference_lgamma_rl( long double, int* ); - - -int reference_isequall( long double, long double ); -int reference_isfinitel( long double ); -int reference_isgreaterl( long double, long double ); -int reference_isgreaterequall( long double, long double ); -int reference_isinfl( long double ); -int reference_islessl( long double, long double ); -int reference_islessequall( long double, long double ); -int reference_islessgreaterl( long double, long double ); -int reference_isnanl( long double ); -int reference_isnormall( long double ); -int reference_isnotequall( long double, long double ); -int reference_isorderedl( long double, long double ); -int reference_isunorderedl( long double, long double ); -int reference_signbitl( long double ); - -long double reference_acoshl( long double x ); -long double reference_asinhl( long double x ); -long double reference_atanhl( long double x ); +long double reference_pownl(long double, int); +long double reference_powrl(long double, long double); +long double reference_cosl(long double); +long double reference_sinl(long double); +long double reference_tanl(long double); +long double reference_logl(long double); +long double reference_log10l(long double); +long double reference_modfl(long double, long double* n); + + +long double reference_fdiml(long double, long double); +long double reference_addl(long double, long double); +long double reference_subtractl(long double, long double); +long double reference_dividel(long double, long double); +long double reference_multiplyl(long double, long double); +long double reference_remquol(long double, long double, int*); +long double reference_lgamma_rl(long double, int*); + + +int reference_isequall(long double, long double); +int reference_isfinitel(long double); +int reference_isgreaterl(long double, long double); +int reference_isgreaterequall(long double, long double); +int reference_isinfl(long double); +int reference_islessl(long double, long double); +int reference_islessequall(long double, long double); +int reference_islessgreaterl(long double, long double); +int reference_isnanl(long double); +int reference_isnormall(long double); +int reference_isnotequall(long double, long double); +int reference_isorderedl(long double, long double); +int reference_isunorderedl(long double, long double); +int reference_signbitl(long double); + +long double reference_acoshl(long double x); +long double reference_asinhl(long double x); +long double reference_atanhl(long double x); long double reference_cbrtl(long double x); -long double reference_copysignl( long double x, long double y); -long double reference_exp10l( long double ); -long double reference_exp2l( long double x ); -long double reference_expm1l( long double x ); -long double reference_fmaxl( long double x, long double y ); -long double reference_fminl( long double x, long double y ); -long double reference_hypotl( long double x, long double y ); -long double reference_lgammal( long double x); -int reference_ilogbl( long double ); -long double reference_log2l( long double x ); -long double reference_log1pl( long double x ); -long double reference_logbl( long double x ); -long double reference_maxmagl( long double x, long double y ); -long double reference_minmagl( long double x, long double y ); -long double reference_nanl( cl_ulong x ); -long double reference_reciprocall( long double x ); -long double reference_remainderl( long double x, long double y ); -long double reference_rintl( long double x ); -long double reference_roundl( long double x ); -long double reference_truncl( long double x ); -long double reference_floorl( long double x ); -long double reference_fmodl( long double x, long double y ); -long double reference_frexpl( long double x, int *n ); -long double reference_ldexpl( long double x, int n ); - -long double reference_assignmentl( long double x ); -int reference_notl( long double x ); +long double reference_copysignl(long double x, long double y); +long double reference_exp10l(long double); +long double reference_exp2l(long double x); +long double reference_expm1l(long double x); +long double reference_fmaxl(long double x, long double y); +long double reference_fminl(long double x, long double y); +long double reference_hypotl(long double x, long double y); +long double reference_lgammal(long double x); +int reference_ilogbl(long double); +long double reference_log2l(long double x); +long double reference_log1pl(long double x); +long double reference_logbl(long double x); +long double reference_maxmagl(long double x, long double y); +long double reference_minmagl(long double x, long double y); +long double reference_nanl(cl_ulong x); +long double reference_reciprocall(long double x); +long double reference_remainderl(long double x, long double y); +long double reference_rintl(long double x); +long double reference_roundl(long double x); +long double reference_truncl(long double x); +long double reference_floorl(long double x); +long double reference_fmodl(long double x, long double y); +long double reference_frexpl(long double x, int* n); +long double reference_ldexpl(long double x, int n); + +long double reference_assignmentl(long double x); +int reference_notl(long double x); #endif - - diff --git a/test_conformance/math_brute_force/sleep.cpp b/test_conformance/math_brute_force/sleep.cpp new file mode 100644 index 0000000000..c7b1243d4f --- /dev/null +++ b/test_conformance/math_brute_force/sleep.cpp @@ -0,0 +1,110 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "sleep.h" +#include "utility.h" + +#if defined(__APPLE__) +#include +#include + +struct +{ + io_connect_t connection; + IONotificationPortRef port; + io_object_t iterator; +} sleepInfo; + +void sleepCallback(void* refcon, io_service_t service, natural_t messageType, + void* messageArgument); + +void sleepCallback(void* refcon UNUSED, io_service_t service UNUSED, + natural_t messageType, void* messageArgument) +{ + + IOReturn result; + /* + service -- The IOService whose state has changed. + messageType -- A messageType enum, defined by IOKit/IOMessage.h or by the + IOService's family. messageArgument -- An argument for the message, + dependent on the messageType. + */ + switch (messageType) + { + case kIOMessageSystemWillSleep: + // Handle demand sleep (such as sleep caused by running out of + // batteries, closing the lid of a laptop, or selecting + // sleep from the Apple menu. + IOAllowPowerChange(sleepInfo.connection, (long)messageArgument); + vlog("Hard sleep occurred.\n"); + break; + case kIOMessageCanSystemSleep: + // In this case, the computer has been idle for several minutes + // and will sleep soon so you must either allow or cancel + // this notification. Important: if you don’t respond, there will + // be a 30-second timeout before the computer sleeps. + // IOCancelPowerChange(root_port,(long)messageArgument); + result = IOCancelPowerChange(sleepInfo.connection, + (long)messageArgument); + if (kIOReturnSuccess != result) + vlog("sleep prevention failed. (%d)\n", result); + break; + case kIOMessageSystemHasPoweredOn: + // Handle wakeup. + break; + } +} +#endif + + +void PreventSleep(void) +{ +#if defined(__APPLE__) + vlog("Disabling sleep... "); + sleepInfo.iterator = (io_object_t)0; + sleepInfo.port = NULL; + sleepInfo.connection = IORegisterForSystemPower( + &sleepInfo, // void * refcon, + &sleepInfo.port, // IONotificationPortRef * thePortRef, + sleepCallback, // IOServiceInterestCallback callback, + &sleepInfo.iterator // io_object_t * notifier + ); + + if ((io_connect_t)0 == sleepInfo.connection) + vlog("failed.\n"); + else + vlog("done.\n"); + + CFRunLoopAddSource(CFRunLoopGetCurrent(), + IONotificationPortGetRunLoopSource(sleepInfo.port), + kCFRunLoopDefaultMode); +#else + vlog("*** PreventSleep() is not implemented on this platform.\n"); +#endif +} + +void ResumeSleep(void) +{ +#if defined(__APPLE__) + IOReturn result = IODeregisterForSystemPower(&sleepInfo.iterator); + if (0 != result) + vlog("Got error %d restoring sleep \n", result); + else + vlog("Sleep restored.\n"); +#else + vlog("*** ResumeSleep() is not implemented on this platform.\n"); +#endif +} diff --git a/test_conformance/math_brute_force/Sleep.h b/test_conformance/math_brute_force/sleep.h similarity index 91% rename from test_conformance/math_brute_force/Sleep.h rename to test_conformance/math_brute_force/sleep.h index f983a32fd1..ca643954f4 100644 --- a/test_conformance/math_brute_force/Sleep.h +++ b/test_conformance/math_brute_force/sleep.h @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -16,9 +16,7 @@ #ifndef SLEEP_H #define SLEEP_H -void PreventSleep( void ); -void ResumeSleep( void ); +void PreventSleep(void); +void ResumeSleep(void); #endif /* SLEEP_H */ - - diff --git a/test_conformance/math_brute_force/ternary.cpp b/test_conformance/math_brute_force/ternary.cpp deleted file mode 100644 index 2c4b503ef2..0000000000 --- a/test_conformance/math_brute_force/ternary.cpp +++ /dev/null @@ -1,1368 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "Utility.h" - -#include -#include "FunctionList.h" - -#define CORRECTLY_ROUNDED 0 -#define FLUSHED 1 - -int TestFunc_Float_Float_Float_Float(const Func *f, MTdata, bool relaxedMode); -int TestFunc_Double_Double_Double_Double(const Func *f, MTdata, - bool relaxedMode); - -extern const vtbl _ternary = { "ternary", TestFunc_Float_Float_Float_Float, - TestFunc_Double_Double_Double_Double }; - -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode); -static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode); -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) -{ - const char *c[] = { - "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2, __global float", sizeNames[vectorSize], "* in3 )\n" - "{\n" - " int i = get_global_id(0);\n" - " out[i] = ", name, "( in1[i], in2[i], in3[i] );\n" - "}\n" - }; - - const char *c3[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global float* in, __global float* in2 , __global float* in3)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " float3 f0 = vload3( 0, in + 3 * i );\n" - " float3 f1 = vload3( 0, in2 + 3 * i );\n" - " float3 f2 = vload3( 0, in3 + 3 * i );\n" - " f0 = ", name, "( f0, f1, f2 );\n" - " vstore3( f0, 0, out + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n" - " float3 f0, f1, f2;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " f0 = (float3)( in[3*i], NAN, NAN ); \n" - " f1 = (float3)( in2[3*i], NAN, NAN ); \n" - " f2 = (float3)( in3[3*i], NAN, NAN ); \n" - " break;\n" - " case 0:\n" - " f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n" - " f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n" - " f2 = (float3)( in3[3*i], in3[3*i+1], NAN ); \n" - " break;\n" - " }\n" - " f0 = ", name, "( f0, f1, f2 );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = f0.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = f0.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - - const char **kern = c; - size_t kernSize = sizeof(c)/sizeof(c[0]); - - if( sizeValues[vectorSize] == 3 ) - { - kern = c3; - kernSize = sizeof(c3)/sizeof(c3[0]); - } - - char testName[32]; - snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - - return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); -} - -static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) -{ - const char *c[] = { - "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2, __global double", sizeNames[vectorSize], "* in3 )\n" - "{\n" - " int i = get_global_id(0);\n" - " out[i] = ", name, "( in1[i], in2[i], in3[i] );\n" - "}\n" - }; - - const char *c3[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in, __global double* in2 , __global double* in3)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " double3 d0 = vload3( 0, in + 3 * i );\n" - " double3 d1 = vload3( 0, in2 + 3 * i );\n" - " double3 d2 = vload3( 0, in3 + 3 * i );\n" - " d0 = ", name, "( d0, d1, d2 );\n" - " vstore3( d0, 0, out + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n" - " double3 d0, d1, d2;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " d0 = (double3)( in[3*i], NAN, NAN ); \n" - " d1 = (double3)( in2[3*i], NAN, NAN ); \n" - " d2 = (double3)( in3[3*i], NAN, NAN ); \n" - " break;\n" - " case 0:\n" - " d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n" - " d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n" - " d2 = (double3)( in3[3*i], in3[3*i+1], NAN ); \n" - " break;\n" - " }\n" - " d0 = ", name, "( d0, d1, d2 );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = d0.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = d0.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - - const char **kern = c; - size_t kernSize = sizeof(c)/sizeof(c[0]); - - if( sizeValues[vectorSize] == 3 ) - { - kern = c3; - kernSize = sizeof(c3)/sizeof(c3[0]); - } - - char testName[32]; - snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - - return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); -} - -typedef struct BuildKernelInfo -{ - cl_uint offset; // the first vector size to build - cl_kernel *kernels; - cl_program *programs; - const char *nameInCode; - bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -}BuildKernelInfo; - -static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); -static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ) -{ - BuildKernelInfo *info = (BuildKernelInfo*) p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernels + i, - info->programs + i, info->relaxedMode); -} - -static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); -static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ) -{ - BuildKernelInfo *info = (BuildKernelInfo*) p; - cl_uint i = info->offset + job_id; - return BuildKernelDouble(info->nameInCode, i, info->kernels + i, - info->programs + i, info->relaxedMode); -} - - -// A table of more difficult cases to get right -static const float specialValuesFloat[] = { - -NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39), MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38), - -3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.75f, -1.5f, -1.25f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24), MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), MAKE_HEX_FLOAT(-0x1.003p0f, -0x1003000L, -24), -MAKE_HEX_FLOAT(0x1.001p0f, 0x1001000L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25), - MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150), - MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f, - - +NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38), - +3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.75f, 1.5f, 1.25f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), MAKE_HEX_FLOAT(0x1.003p0f, 0x1003000L, -24), +MAKE_HEX_FLOAT(0x1.001p0f, 0x1001000L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25), - MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150), - MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f -}; - -static size_t specialValuesFloatCount = sizeof( specialValuesFloat ) / sizeof( specialValuesFloat[0] ); - - -int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) -{ - uint64_t i; - uint32_t j, k; - int error; - cl_program programs[ VECTOR_SIZE_COUNT ]; - cl_kernel kernels[ VECTOR_SIZE_COUNT ]; - float maxError = 0.0f; - int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); - float maxErrorVal = 0.0f; - float maxErrorVal2 = 0.0f; - float maxErrorVal3 = 0.0f; - size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE; - - uint64_t step = bufferSize / sizeof( float ); - int skipNanInf = (0 == strcmp( "fma", f->nameInCode )) && ! gInfNanSupport; - cl_uchar overflow[BUFFER_SIZE / sizeof( float )]; - float float_ulps; - - logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - if( gWimpyMode ) - { - step = (1ULL<<32) * gWimpyReductionFactor / (512); - } - - if( gIsEmbedded ) - float_ulps = f->float_embedded_ulps; - else - float_ulps = f->float_ulps; - - // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; - if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) - return error; - /* - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - if( (error = BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) ) - return error; - */ - - for( i = 0; i < (1ULL<<32); i += step ) - { - //Init input array - uint32_t *p = (uint32_t *)gIn; - uint32_t *p2 = (uint32_t *)gIn2; - uint32_t *p3 = (uint32_t *)gIn3; - j = 0; - if( i == 0 ) - { // test edge cases - float *fp = (float *)gIn; - float *fp2 = (float *)gIn2; - float *fp3 = (float *)gIn3; - uint32_t x, y, z; x = y = z = 0; - for( ; j < bufferSize / sizeof( float ); j++ ) - { - fp[j] = specialValuesFloat[x]; - fp2[j] = specialValuesFloat[y]; - fp3[j] = specialValuesFloat[z]; - - if( ++x >= specialValuesFloatCount ) - { - x = 0; - if( ++y >= specialValuesFloatCount ) - { - y = 0; - if( ++z >= specialValuesFloatCount ) - break; - } - } - } - if( j == bufferSize / sizeof( float ) ) - vlog_error( "Test Error: not all special cases tested!\n" ); - } - - for( ; j < bufferSize / sizeof( float ); j++ ) - { - p[j] = genrand_int32(d); - p2[j] = genrand_int32(d); - p3[j] = genrand_int32(d); - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error ); - return error; - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error ); - return error; - } - - // write garbage into output arrays - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - uint32_t pattern = 0xffffdead; - memset_pattern4(gOut[j], &pattern, bufferSize); - if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j ); - goto exit; - } - } - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeof( cl_float ) * sizeValues[j]; - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; // bufferSize / vectorSize rounded up - if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; } - - if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - } - - // Get that moving - if( (error = clFlush(gQueue) )) - vlog( "clFlush failed\n" ); - - //Calculate the correctly rounded reference result - float *r = (float *)gOut_Ref; - float *s = (float *)gIn; - float *s2 = (float *)gIn2; - float *s3 = (float *)gIn3; - if( skipNanInf ) - { - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - { - feclearexcept(FE_OVERFLOW); - r[j] = (float) f->func.f_fma( s[j], s2[j], s3[j], CORRECTLY_ROUNDED ); - overflow[j] = FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW)); - } - } - else - { - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - r[j] = (float) f->func.f_fma( s[j], s2[j], s3[j], CORRECTLY_ROUNDED ); - } - - - // Read the data back - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) ) - { - vlog_error( "ReadArray failed %d\n", error ); - goto exit; - } - } - - if( gSkipCorrectnessTesting ) - break; - - //Verify data - uint32_t *t = (uint32_t *)gOut_Ref; - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - { - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - uint32_t *q = (uint32_t *)(gOut[k]); - - // If we aren't getting the correctly rounded result - if( t[j] != q[j] ) - { - float err; - int fail; - float test = ((float*) q)[j]; - float correct = f->func.f_fma( s[j], s2[j], s3[j], CORRECTLY_ROUNDED ); - - // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow - if( skipNanInf ) - { - if( overflow[j] || - IsFloatInfinity(correct) || IsFloatNaN(correct) || - IsFloatInfinity(s[j]) || IsFloatNaN(s[j]) || - IsFloatInfinity(s2[j]) || IsFloatNaN(s2[j]) || - IsFloatInfinity(s3[j]) || IsFloatNaN(s3[j]) ) - continue; - } - - - err = Ulp_Error( test, correct ); - fail = ! (fabsf(err) <= float_ulps); - - if( fail && ftz ) - { - float correct2, err2; - - // retry per section 6.5.3.2 with flushing on - if( 0.0f == test && 0.0f == f->func.f_fma( s[j], s2[j], s3[j], FLUSHED ) ) - { - fail = 0; - err = 0.0f; - } - - // retry per section 6.5.3.3 - if( fail && IsFloatSubnormal( s[j] ) ) - { // look at me, - float err3, correct3; - - if( skipNanInf ) - feclearexcept( FE_OVERFLOW ); - - correct2 = f->func.f_fma( 0.0f, s2[j], s3[j], CORRECTLY_ROUNDED ); - correct3 = f->func.f_fma( -0.0f, s2[j], s3[j], CORRECTLY_ROUNDED ); - - if( skipNanInf ) - { - if( fetestexcept( FE_OVERFLOW ) ) - continue; - - // Note: no double rounding here. Reference functions calculate in single precision. - if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) || - IsFloatInfinity(correct3) || IsFloatNaN(correct3) ) - continue; - } - - err2 = Ulp_Error( test, correct2 ); - err3 = Ulp_Error( test, correct3 ); - fail = fail && ((!(fabsf(err2) <= float_ulps)) && (!(fabsf(err3) <= float_ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - - // retry per section 6.5.3.4 - if( 0.0f == test && - ( 0.0f == f->func.f_fma( 0.0f, s2[j], s3[j], FLUSHED ) || - 0.0f == f->func.f_fma( -0.0f, s2[j], s3[j], FLUSHED ) ) - ) - { - fail = 0; - err = 0.0f; - } - - //try with first two args as zero - if( IsFloatSubnormal( s2[j] ) ) - { // its fun to have fun, - double correct4, correct5; - float err4, err5; - - if( skipNanInf ) - feclearexcept( FE_OVERFLOW ); - - correct2 = f->func.f_fma( 0.0f, 0.0f, s3[j], CORRECTLY_ROUNDED ); - correct3 = f->func.f_fma( -0.0f, 0.0f, s3[j], CORRECTLY_ROUNDED ); - correct4 = f->func.f_fma( 0.0f, -0.0f, s3[j], CORRECTLY_ROUNDED ); - correct5 = f->func.f_fma( -0.0f, -0.0f, s3[j], CORRECTLY_ROUNDED ); - - // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow - if( !gInfNanSupport ) - { - if( fetestexcept(FE_OVERFLOW) ) - continue; - - // Note: no double rounding here. Reference functions calculate in single precision. - if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) || - IsFloatInfinity(correct3) || IsFloatNaN(correct3) || - IsFloatInfinity(correct4) || IsFloatNaN(correct4) || - IsFloatInfinity(correct5) || IsFloatNaN(correct5) ) - continue; - } - - err2 = Ulp_Error( test, correct2 ); - err3 = Ulp_Error( test, correct3 ); - err4 = Ulp_Error( test, correct4 ); - err5 = Ulp_Error( test, correct5 ); - fail = fail && ((!(fabsf(err2) <= float_ulps)) && (!(fabsf(err3) <= float_ulps)) && - (!(fabsf(err4) <= float_ulps)) && (!(fabsf(err5) <= float_ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - if( fabsf( err4 ) < fabsf(err ) ) - err = err4; - if( fabsf( err5 ) < fabsf(err ) ) - err = err5; - - // retry per section 6.5.3.4 - if( 0.0f == test && - ( 0.0f == f->func.f_fma( 0.0f, 0.0f, s3[j], FLUSHED ) || - 0.0f == f->func.f_fma( -0.0f, 0.0f, s3[j], FLUSHED ) || - 0.0f == f->func.f_fma( 0.0f, -0.0f, s3[j], FLUSHED ) || - 0.0f == f->func.f_fma( -0.0f, -0.0f, s3[j], FLUSHED ) ) - ) - { - fail = 0; - err = 0.0f; - } - - if( IsFloatSubnormal( s3[j] ) ) - { - if( test == 0.0f ) // 0*0+0 is 0 - { - fail = 0; - err = 0.0f; - } - } - } - else if( IsFloatSubnormal( s3[j] ) ) - { - double correct4, correct5; - float err4, err5; - - if( skipNanInf ) - feclearexcept( FE_OVERFLOW ); - - correct2 = f->func.f_fma( 0.0f, s2[j], 0.0f, CORRECTLY_ROUNDED ); - correct3 = f->func.f_fma( -0.0f, s2[j], 0.0f, CORRECTLY_ROUNDED ); - correct4 = f->func.f_fma( 0.0f, s2[j], -0.0f, CORRECTLY_ROUNDED ); - correct5 = f->func.f_fma( -0.0f, s2[j], -0.0f, CORRECTLY_ROUNDED ); - - // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow - if( !gInfNanSupport ) - { - if( fetestexcept(FE_OVERFLOW) ) - continue; - - // Note: no double rounding here. Reference functions calculate in single precision. - if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) || - IsFloatInfinity(correct3) || IsFloatNaN(correct3) || - IsFloatInfinity(correct4) || IsFloatNaN(correct4) || - IsFloatInfinity(correct5) || IsFloatNaN(correct5) ) - continue; - } - - err2 = Ulp_Error( test, correct2 ); - err3 = Ulp_Error( test, correct3 ); - err4 = Ulp_Error( test, correct4 ); - err5 = Ulp_Error( test, correct5 ); - fail = fail && ((!(fabsf(err2) <= float_ulps)) && (!(fabsf(err3) <= float_ulps)) && - (!(fabsf(err4) <= float_ulps)) && (!(fabsf(err5) <= float_ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - if( fabsf( err4 ) < fabsf(err ) ) - err = err4; - if( fabsf( err5 ) < fabsf(err ) ) - err = err5; - - // retry per section 6.5.3.4 - if( 0.0f == test && - ( 0.0f == f->func.f_fma( 0.0f, s2[j], 0.0f, FLUSHED ) || - 0.0f == f->func.f_fma(-0.0f, s2[j], 0.0f, FLUSHED ) || - 0.0f == f->func.f_fma( 0.0f, s2[j],-0.0f, FLUSHED ) || - 0.0f == f->func.f_fma(-0.0f, s2[j],-0.0f, FLUSHED ) ) - ) - { - fail = 0; - err = 0.0f; - } - } - } - else if( fail && IsFloatSubnormal( s2[j] ) ) - { - double correct2, correct3; - float err2, err3; - - if( skipNanInf ) - feclearexcept( FE_OVERFLOW ); - - correct2 = f->func.f_fma( s[j], 0.0f, s3[j], CORRECTLY_ROUNDED ); - correct3 = f->func.f_fma( s[j], -0.0f, s3[j], CORRECTLY_ROUNDED ); - - if( skipNanInf ) - { - if( fetestexcept( FE_OVERFLOW ) ) - continue; - - // Note: no double rounding here. Reference functions calculate in single precision. - if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) || - IsFloatInfinity(correct3) || IsFloatNaN(correct3) ) - continue; - } - - err2 = Ulp_Error( test, correct2 ); - err3 = Ulp_Error( test, correct3 ); - fail = fail && ((!(fabsf(err2) <= float_ulps)) && (!(fabsf(err3) <= float_ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - - // retry per section 6.5.3.4 - if( 0.0f == test && - ( 0.0f == f->func.f_fma( s[j], 0.0f, s3[j], FLUSHED ) || - 0.0f == f->func.f_fma( s[j], -0.0f, s3[j], FLUSHED ) ) - ) - { - fail = 0; - err = 0.0f; - } - - //try with second two args as zero - if( IsFloatSubnormal( s3[j] ) ) - { - double correct4, correct5; - float err4, err5; - - if( skipNanInf ) - feclearexcept( FE_OVERFLOW ); - - correct2 = f->func.f_fma( s[j], 0.0f, 0.0f, CORRECTLY_ROUNDED ); - correct3 = f->func.f_fma( s[j], -0.0f, 0.0f, CORRECTLY_ROUNDED ); - correct4 = f->func.f_fma( s[j], 0.0f, -0.0f, CORRECTLY_ROUNDED ); - correct5 = f->func.f_fma( s[j], -0.0f, -0.0f, CORRECTLY_ROUNDED ); - - // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow - if( !gInfNanSupport ) - { - if( fetestexcept(FE_OVERFLOW) ) - continue; - - // Note: no double rounding here. Reference functions calculate in single precision. - if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) || - IsFloatInfinity(correct3) || IsFloatNaN(correct3) || - IsFloatInfinity(correct4) || IsFloatNaN(correct4) || - IsFloatInfinity(correct5) || IsFloatNaN(correct5) ) - continue; - } - - err2 = Ulp_Error( test, correct2 ); - err3 = Ulp_Error( test, correct3 ); - err4 = Ulp_Error( test, correct4 ); - err5 = Ulp_Error( test, correct5 ); - fail = fail && ((!(fabsf(err2) <= float_ulps)) && (!(fabsf(err3) <= float_ulps)) && - (!(fabsf(err4) <= float_ulps)) && (!(fabsf(err5) <= float_ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - if( fabsf( err4 ) < fabsf(err ) ) - err = err4; - if( fabsf( err5 ) < fabsf(err ) ) - err = err5; - - // retry per section 6.5.3.4 - if( 0.0f == test && - ( 0.0f == f->func.f_fma( s[j], 0.0f, 0.0f, FLUSHED ) || - 0.0f == f->func.f_fma( s[j],-0.0f, 0.0f, FLUSHED ) || - 0.0f == f->func.f_fma( s[j], 0.0f,-0.0f, FLUSHED ) || - 0.0f == f->func.f_fma( s[j],-0.0f,-0.0f, FLUSHED ) ) - ) - { - fail = 0; - err = 0.0f; - } - } - } - else if( fail && IsFloatSubnormal(s3[j]) ) - { - double correct2, correct3; - float err2, err3; - - if( skipNanInf ) - feclearexcept( FE_OVERFLOW ); - - correct2 = f->func.f_fma( s[j], s2[j], 0.0f, CORRECTLY_ROUNDED ); - correct3 = f->func.f_fma( s[j], s2[j], -0.0f, CORRECTLY_ROUNDED ); - - if( skipNanInf ) - { - if( fetestexcept( FE_OVERFLOW ) ) - continue; - - // Note: no double rounding here. Reference functions calculate in single precision. - if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) || - IsFloatInfinity(correct3) || IsFloatNaN(correct3) ) - continue; - } - - err2 = Ulp_Error( test, correct2 ); - err3 = Ulp_Error( test, correct3 ); - fail = fail && ((!(fabsf(err2) <= float_ulps)) && (!(fabsf(err3) <= float_ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - - // retry per section 6.5.3.4 - if( 0.0f == test && - ( 0.0f == f->func.f_fma( s[j], s2[j], 0.0f, FLUSHED ) || - 0.0f == f->func.f_fma( s[j], s2[j],-0.0f, FLUSHED ) ) - ) - { - fail = 0; - err = 0.0f; - } - } - } - - if( fabsf(err ) > maxError ) - { - maxError = fabsf(err); - maxErrorVal = s[j]; - maxErrorVal2 = s2[j]; - maxErrorVal3 = s3[j]; - } - - if( fail ) - { - vlog_error( "\nERROR: %s%s: %f ulp error at {%a, %a, %a} ({0x%8.8x, 0x%8.8x, 0x%8.8x}): *%a vs. %a\n", f->name, sizeNames[k], err, s[j], s2[j], s3[j], ((cl_uint*)s)[j], ((cl_uint*)s2)[j], ((cl_uint*)s3)[j], ((float*) gOut_Ref)[j], test ); - error = -1; - goto exit; - } - } - } - } - - if( 0 == (i & 0x0fffffff) ) - { - if (gVerboseBruteForce) - { - vlog("base:%14u step:%10u bufferSize:%10zd \n", i, step, bufferSize); - } else - { - vlog("." ); - } - fflush(stdout); - } - } - - if( ! gSkipCorrectnessTesting ) - { - if( gWimpyMode ) - vlog( "Wimp pass" ); - else - vlog( "passed" ); - } - - if( gMeasureTimes ) - { - //Init input array - uint32_t *p = (uint32_t *)gIn; - uint32_t *p2 = (uint32_t *)gIn2; - uint32_t *p3 = (uint32_t *)gIn3; - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - { - p[j] = genrand_int32(d); - p2[j] = genrand_int32(d); - p3[j] = genrand_int32(d); - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error ); - return error; - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error ); - return error; - } - - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeof( cl_float ) * sizeValues[j]; - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; // bufferSize / vectorSize rounded up - if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; } - - double sum = 0.0; - double bestTime = INFINITY; - for( k = 0; k < PERF_LOOP_COUNT; k++ ) - { - uint64_t startTime = GetTime(); - if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - - // Make sure OpenCL is done - if( (error = clFinish(gQueue) ) ) - { - vlog_error( "Error %d at clFinish\n", error ); - goto exit; - } - - uint64_t endTime = GetTime(); - double time = SubtractTime( endTime, startTime ); - sum += time; - if( time < bestTime ) - bestTime = time; - } - - if( gReportAverageTimes ) - bestTime = sum / PERF_LOOP_COUNT; - double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) ); - vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] ); - } - } - - if( ! gSkipCorrectnessTesting ) - vlog( "\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2, maxErrorVal3 ); - vlog( "\n" ); - -exit: - // Release - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - clReleaseKernel(kernels[k]); - clReleaseProgram(programs[k]); - } - - return error; -} - -// A table of more difficult cases to get right -static const double specialValuesDouble[] = { - -NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10), - -3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53), - MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074), - MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0, - - +NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10), - +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53), - MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074), - MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0, -}; - -static const size_t specialValuesDoubleCount = sizeof( specialValuesDouble ) / sizeof( specialValuesDouble[0] ); - - -int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d, - bool relaxedMode) -{ - uint64_t i; - uint32_t j, k; - int error; - cl_program programs[ VECTOR_SIZE_COUNT ]; - cl_kernel kernels[ VECTOR_SIZE_COUNT ]; - float maxError = 0.0f; - int ftz = f->ftz || gForceFTZ; - double maxErrorVal = 0.0f; - double maxErrorVal2 = 0.0f; - double maxErrorVal3 = 0.0f; - logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); - - size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE; - uint64_t step = bufferSize / sizeof( double ); - if( gWimpyMode ) - { - step = (1ULL<<32) * gWimpyReductionFactor / (512); - } - - Force64BitFPUPrecision(); - - // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; - if( (error = ThreadPool_Do( BuildKernel_DoubleFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info ) )) - { - return error; - } - /* - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - if( (error = BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) ) - return error; - */ - - for( i = 0; i < (1ULL<<32); i += step ) - { - //Init input array - double *p = (double *)gIn; - double *p2 = (double *)gIn2; - double *p3 = (double *)gIn3; - j = 0; - if( i == 0 ) - { // test edge cases - uint32_t x, y, z; x = y = z = 0; - for( ; j < bufferSize / sizeof( double ); j++ ) - { - p[j] = specialValuesDouble[x]; - p2[j] = specialValuesDouble[y]; - p3[j] = specialValuesDouble[z]; - if( ++x >= specialValuesDoubleCount ) - { - x = 0; - if( ++y >= specialValuesDoubleCount ) - { - y = 0; - if( ++z >= specialValuesDoubleCount ) - break; - } - } - } - if( j == bufferSize / sizeof( double ) ) - vlog_error( "Test Error: not all special cases tested!\n" ); - } - - for( ; j < bufferSize / sizeof( double ); j++ ) - { - p[j] = DoubleFromUInt32(genrand_int32(d)); - p2[j] = DoubleFromUInt32(genrand_int32(d)); - p3[j] = DoubleFromUInt32(genrand_int32(d)); - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error ); - return error; - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error ); - return error; - } - - // write garbage into output arrays - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - uint32_t pattern = 0xffffdead; - memset_pattern4(gOut[j], &pattern, bufferSize); - if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j ); - goto exit; - } - } - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeof( cl_double ) * sizeValues[j]; - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; // bufferSize / vectorSize rounded up - if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; } - - if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - } - - - // Get that moving - if( (error = clFlush(gQueue) )) - vlog( "clFlush failed\n" ); - - //Calculate the correctly rounded reference result - double *r = (double *)gOut_Ref; - double *s = (double *)gIn; - double *s2 = (double *)gIn2; - double *s3 = (double *)gIn3; - for( j = 0; j < bufferSize / sizeof( double ); j++ ) - r[j] = (double) f->dfunc.f_fff( s[j], s2[j], s3[j] ); - - // Read the data back - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) ) - { - vlog_error( "ReadArray failed %d\n", error ); - goto exit; - } - } - - if( gSkipCorrectnessTesting ) - break; - - //Verify data - uint64_t *t = (uint64_t *)gOut_Ref; - for( j = 0; j < bufferSize / sizeof( double ); j++ ) - { - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - uint64_t *q = (uint64_t *)(gOut[k]); - - // If we aren't getting the correctly rounded result - if( t[j] != q[j] ) - { - double test = ((double*) q)[j]; - long double correct = f->dfunc.f_fff( s[j], s2[j], s3[j] ); - float err = Bruteforce_Ulp_Error_Double( test, correct ); - int fail = ! (fabsf(err) <= f->double_ulps); - - if( fail && ftz ) - { - // retry per section 6.5.3.2 - if( IsDoubleSubnormal(correct) ) - { // look at me, - fail = fail && ( test != 0.0f ); - if( ! fail ) - err = 0.0f; - } - - // retry per section 6.5.3.3 - if( fail && IsDoubleSubnormal( s[j] ) ) - { // look at me, - long double correct2 = f->dfunc.f_fff( 0.0, s2[j], s3[j] ); - long double correct3 = f->dfunc.f_fff( -0.0, s2[j], s3[j] ); - float err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); - float err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); - fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - - // retry per section 6.5.3.4 - if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) ) - { // look at me now, - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - - //try with first two args as zero - if( IsDoubleSubnormal( s2[j] ) ) - { // its fun to have fun, - correct2 = f->dfunc.f_fff( 0.0, 0.0, s3[j] ); - correct3 = f->dfunc.f_fff( -0.0, 0.0, s3[j] ); - long double correct4 = f->dfunc.f_fff( 0.0, -0.0, s3[j] ); - long double correct5 = f->dfunc.f_fff( -0.0, -0.0, s3[j] ); - err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); - err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); - float err4 = Bruteforce_Ulp_Error_Double( test, correct4 ); - float err5 = Bruteforce_Ulp_Error_Double( test, correct5 ); - fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) && - (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - if( fabsf( err4 ) < fabsf(err ) ) - err = err4; - if( fabsf( err5 ) < fabsf(err ) ) - err = err5; - - // retry per section 6.5.3.4 - if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) || - IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - - if( IsDoubleSubnormal( s3[j] ) ) - { // but you have to know how! - correct2 = f->dfunc.f_fff( 0.0, 0.0, 0.0f ); - correct3 = f->dfunc.f_fff( -0.0, 0.0, 0.0f ); - correct4 = f->dfunc.f_fff( 0.0, -0.0, 0.0f ); - correct5 = f->dfunc.f_fff( -0.0, -0.0, 0.0f ); - long double correct6 = f->dfunc.f_fff( 0.0, 0.0, -0.0f ); - long double correct7 = f->dfunc.f_fff( -0.0, 0.0, -0.0f ); - long double correct8 = f->dfunc.f_fff( 0.0, -0.0, -0.0f ); - long double correct9 = f->dfunc.f_fff( -0.0, -0.0, -0.0f ); - err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); - err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); - err4 = Bruteforce_Ulp_Error_Double( test, correct4 ); - err5 = Bruteforce_Ulp_Error_Double( test, correct5 ); - float err6 = Bruteforce_Ulp_Error_Double( test, correct6 ); - float err7 = Bruteforce_Ulp_Error_Double( test, correct7 ); - float err8 = Bruteforce_Ulp_Error_Double( test, correct8 ); - float err9 = Bruteforce_Ulp_Error_Double( test, correct9 ); - fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) && - (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)) && - (!(fabsf(err5) <= f->double_ulps)) && (!(fabsf(err6) <= f->double_ulps)) && - (!(fabsf(err7) <= f->double_ulps)) && (!(fabsf(err8) <= f->double_ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - if( fabsf( err4 ) < fabsf(err ) ) - err = err4; - if( fabsf( err5 ) < fabsf(err ) ) - err = err5; - if( fabsf( err6 ) < fabsf(err ) ) - err = err6; - if( fabsf( err7 ) < fabsf(err ) ) - err = err7; - if( fabsf( err8 ) < fabsf(err ) ) - err = err8; - if( fabsf( err9 ) < fabsf(err ) ) - err = err9; - - // retry per section 6.5.3.4 - if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) || - IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps ) || - IsDoubleResultSubnormal( correct6, f->double_ulps ) || IsDoubleResultSubnormal( correct7, f->double_ulps ) || - IsDoubleResultSubnormal( correct8, f->double_ulps ) || IsDoubleResultSubnormal( correct9, f->double_ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - } - } - else if( IsDoubleSubnormal( s3[j] ) ) - { - correct2 = f->dfunc.f_fff( 0.0, s2[j], 0.0 ); - correct3 = f->dfunc.f_fff( -0.0, s2[j], 0.0 ); - long double correct4 = f->dfunc.f_fff( 0.0, s2[j], -0.0 ); - long double correct5 = f->dfunc.f_fff( -0.0, s2[j], -0.0 ); - err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); - err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); - float err4 = Bruteforce_Ulp_Error_Double( test, correct4 ); - float err5 = Bruteforce_Ulp_Error_Double( test, correct5 ); - fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) && - (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - if( fabsf( err4 ) < fabsf(err ) ) - err = err4; - if( fabsf( err5 ) < fabsf(err ) ) - err = err5; - - // retry per section 6.5.3.4 - if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) || - IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - } - } - else if( fail && IsDoubleSubnormal( s2[j] ) ) - { - long double correct2 = f->dfunc.f_fff( s[j], 0.0, s3[j] ); - long double correct3 = f->dfunc.f_fff( s[j], -0.0, s3[j] ); - float err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); - float err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); - fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - - // retry per section 6.5.3.4 - if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - - //try with second two args as zero - if( IsDoubleSubnormal( s3[j] ) ) - { - correct2 = f->dfunc.f_fff( s[j], 0.0, 0.0 ); - correct3 = f->dfunc.f_fff( s[j], -0.0, 0.0 ); - long double correct4 = f->dfunc.f_fff( s[j], 0.0, -0.0 ); - long double correct5 = f->dfunc.f_fff( s[j], -0.0, -0.0 ); - err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); - err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); - float err4 = Bruteforce_Ulp_Error_Double( test, correct4 ); - float err5 = Bruteforce_Ulp_Error_Double( test, correct5 ); - fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) && - (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - if( fabsf( err4 ) < fabsf(err ) ) - err = err4; - if( fabsf( err5 ) < fabsf(err ) ) - err = err5; - - // retry per section 6.5.3.4 - if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) || - IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - } - } - else if( fail && IsDoubleSubnormal(s3[j]) ) - { - long double correct2 = f->dfunc.f_fff( s[j], s2[j], 0.0 ); - long double correct3 = f->dfunc.f_fff( s[j], s2[j], -0.0 ); - float err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); - float err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); - fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - - // retry per section 6.5.3.4 - if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - } - } - - if( fabsf(err ) > maxError ) - { - maxError = fabsf(err); - maxErrorVal = s[j]; - maxErrorVal2 = s2[j]; - maxErrorVal3 = s3[j]; - } - - if( fail ) - { - vlog_error( "\nERROR: %sD%s: %f ulp error at {%.13la, %.13la, %.13la}: *%.13la vs. %.13la\n", f->name, sizeNames[k], err, s[j], s2[j], s3[j], ((double*) gOut_Ref)[j], test ); - error = -1; - goto exit; - } - } - } - } - - if( 0 == (i & 0x0fffffff) ) - { - if (gVerboseBruteForce) - { - vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, bufferSize); - } else - { - vlog("." ); - } - fflush(stdout); - } - } - - if( ! gSkipCorrectnessTesting ) - { - if( gWimpyMode ) - vlog( "Wimp pass" ); - else - vlog( "passed" ); - } - - if( gMeasureTimes ) - { - //Init input array - double *p = (double *)gIn; - double *p2 = (double *)gIn2; - double *p3 = (double *)gIn3; - for( j = 0; j < bufferSize / sizeof( double ); j++ ) - { - p[j] = DoubleFromUInt32(genrand_int32(d)); - p2[j] = DoubleFromUInt32(genrand_int32(d)); - p3[j] = DoubleFromUInt32(genrand_int32(d)); - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error ); - return error; - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error ); - return error; - } - - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeof( cl_double ) * sizeValues[j]; - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; // bufferSize / vectorSize rounded up - if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; } - - double sum = 0.0; - double bestTime = INFINITY; - for( k = 0; k < PERF_LOOP_COUNT; k++ ) - { - uint64_t startTime = GetTime(); - if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - - // Make sure OpenCL is done - if( (error = clFinish(gQueue) ) ) - { - vlog_error( "Error %d at clFinish\n", error ); - goto exit; - } - - uint64_t endTime = GetTime(); - double time = SubtractTime( endTime, startTime ); - sum += time; - if( time < bestTime ) - bestTime = time; - } - - if( gReportAverageTimes ) - bestTime = sum / PERF_LOOP_COUNT; - double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) ); - vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] ); - } - for( ; j < gMaxVectorSizeIndex; j++ ) - vlog( "\t -- " ); - } - - if( ! gSkipCorrectnessTesting ) - vlog( "\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2, maxErrorVal3 ); - vlog( "\n" ); - -exit: - // Release - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - clReleaseKernel(kernels[k]); - clReleaseProgram(programs[k]); - } - - return error; -} - - diff --git a/test_conformance/math_brute_force/ternary_double.cpp b/test_conformance/math_brute_force/ternary_double.cpp new file mode 100644 index 0000000000..8af136ac27 --- /dev/null +++ b/test_conformance/math_brute_force/ternary_double.cpp @@ -0,0 +1,744 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "function_list.h" +#include "test_functions.h" +#include "utility.h" + +#include + +#define CORRECTLY_ROUNDED 0 +#define FLUSHED 1 + +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) +{ + const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global double", + sizeNames[vectorSize], + "* out, __global double", + sizeNames[vectorSize], + "* in1, __global double", + sizeNames[vectorSize], + "* in2, __global double", + sizeNames[vectorSize], + "* in3 )\n" + "{\n" + " size_t i = get_global_id(0);\n" + " out[i] = ", + name, + "( in1[i], in2[i], in3[i] );\n" + "}\n" }; + + const char *c3[] = { + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global double* out, __global double* in, __global double* in2, " + "__global double* in3)\n" + "{\n" + " size_t i = get_global_id(0);\n" + " if( i + 1 < get_global_size(0) )\n" + " {\n" + " double3 d0 = vload3( 0, in + 3 * i );\n" + " double3 d1 = vload3( 0, in2 + 3 * i );\n" + " double3 d2 = vload3( 0, in3 + 3 * i );\n" + " d0 = ", + name, + "( d0, d1, d2 );\n" + " vstore3( d0, 0, out + 3*i );\n" + " }\n" + " else\n" + " {\n" + " size_t parity = i & 1; // Figure out how many elements are " + "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " + "buffer size \n" + " double3 d0;\n" + " double3 d1;\n" + " double3 d2;\n" + " switch( parity )\n" + " {\n" + " case 1:\n" + " d0 = (double3)( in[3*i], NAN, NAN ); \n" + " d1 = (double3)( in2[3*i], NAN, NAN ); \n" + " d2 = (double3)( in3[3*i], NAN, NAN ); \n" + " break;\n" + " case 0:\n" + " d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n" + " d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n" + " d2 = (double3)( in3[3*i], in3[3*i+1], NAN ); \n" + " break;\n" + " }\n" + " d0 = ", + name, + "( d0, d1, d2 );\n" + " switch( parity )\n" + " {\n" + " case 0:\n" + " out[3*i+1] = d0.y; \n" + " // fall through\n" + " case 1:\n" + " out[3*i] = d0.x; \n" + " break;\n" + " }\n" + " }\n" + "}\n" + }; + + const char **kern = c; + size_t kernSize = sizeof(c) / sizeof(c[0]); + + if (sizeValues[vectorSize] == 3) + { + kern = c3; + kernSize = sizeof(c3) / sizeof(c3[0]); + } + + char testName[32]; + snprintf(testName, sizeof(testName) - 1, "math_kernel%s", + sizeNames[vectorSize]); + + return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); +} + +struct BuildKernelInfo +{ + cl_uint offset; // the first vector size to build + cl_kernel *kernels; + cl_program *programs; + const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. +}; + +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +{ + BuildKernelInfo *info = (BuildKernelInfo *)p; + cl_uint i = info->offset + job_id; + return BuildKernel(info->nameInCode, i, info->kernels + i, + info->programs + i, info->relaxedMode); +} + +// A table of more difficult cases to get right +const double specialValues[] = { + -NAN, + -INFINITY, + -DBL_MAX, + MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), + MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), + MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), + MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10), + -3.0, + MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), + -2.5, + MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), + -2.0, + MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), + -1.5, + MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52), + MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), + -1.0, + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53), + MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), + -DBL_MIN, + MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), + MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), + MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), + MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), + MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), + -0.0, + + +NAN, + +INFINITY, + +DBL_MAX, + MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), + MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11), + MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), + MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10), + +3.0, + MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), + +2.5, + MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), + +2.0, + MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), + +1.5, + MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52), + MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), + +1.0, + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53), + MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), + +DBL_MIN, + MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), + MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), + MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), + MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), + MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), + +0.0, +}; + +constexpr size_t specialValuesCount = + sizeof(specialValues) / sizeof(specialValues[0]); + +} // anonymous namespace + +int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d, + bool relaxedMode) +{ + int error; + cl_program programs[VECTOR_SIZE_COUNT]; + cl_kernel kernels[VECTOR_SIZE_COUNT]; + float maxError = 0.0f; + int ftz = f->ftz || gForceFTZ; + double maxErrorVal = 0.0f; + double maxErrorVal2 = 0.0f; + double maxErrorVal3 = 0.0f; + uint64_t step = getTestStep(sizeof(double), BUFFER_SIZE); + + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); + + Force64BitFPUPrecision(); + + // Init the kernels + { + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + return error; + } + + for (uint64_t i = 0; i < (1ULL << 32); i += step) + { + // Init input array + double *p = (double *)gIn; + double *p2 = (double *)gIn2; + double *p3 = (double *)gIn3; + size_t idx = 0; + + if (i == 0) + { // test edge cases + uint32_t x, y, z; + x = y = z = 0; + for (; idx < BUFFER_SIZE / sizeof(double); idx++) + { + p[idx] = specialValues[x]; + p2[idx] = specialValues[y]; + p3[idx] = specialValues[z]; + if (++x >= specialValuesCount) + { + x = 0; + if (++y >= specialValuesCount) + { + y = 0; + if (++z >= specialValuesCount) break; + } + } + } + if (idx == BUFFER_SIZE / sizeof(double)) + vlog_error("Test Error: not all special cases tested!\n"); + } + + for (; idx < BUFFER_SIZE / sizeof(double); idx++) + { + p[idx] = DoubleFromUInt32(genrand_int32(d)); + p2[idx] = DoubleFromUInt32(genrand_int32(d)); + p3[idx] = DoubleFromUInt32(genrand_int32(d)); + } + + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, + BUFFER_SIZE, gIn, 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); + return error; + } + + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, + BUFFER_SIZE, gIn2, 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error); + return error; + } + + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, + BUFFER_SIZE, gIn3, 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error); + return error; + } + + // write garbage into output arrays + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + uint32_t pattern = 0xffffdead; + memset_pattern4(gOut[j], &pattern, BUFFER_SIZE); + if ((error = + clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, + BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", + error, j); + goto exit; + } + } + + // Run the kernels + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + size_t vectorSize = sizeof(cl_double) * sizeValues[j]; + size_t localCount = (BUFFER_SIZE + vectorSize - 1) + / vectorSize; // BUFFER_SIZE / vectorSize rounded up + if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]), + &gOutBuffer[j]))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer), + &gInBuffer))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer2), + &gInBuffer2))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer3), + &gInBuffer3))) + { + LogBuildError(programs[j]); + goto exit; + } + + if ((error = + clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, + &localCount, NULL, 0, NULL, NULL))) + { + vlog_error("FAILED -- could not execute kernel\n"); + goto exit; + } + } + + // Get that moving + if ((error = clFlush(gQueue))) vlog("clFlush failed\n"); + + // Calculate the correctly rounded reference result + double *r = (double *)gOut_Ref; + double *s = (double *)gIn; + double *s2 = (double *)gIn2; + double *s3 = (double *)gIn3; + for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++) + r[j] = (double)f->dfunc.f_fff(s[j], s2[j], s3[j]); + + // Read the data back + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + if ((error = + clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, + BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + { + vlog_error("ReadArray failed %d\n", error); + goto exit; + } + } + + if (gSkipCorrectnessTesting) break; + + // Verify data + uint64_t *t = (uint64_t *)gOut_Ref; + for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++) + { + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + uint64_t *q = (uint64_t *)(gOut[k]); + + // If we aren't getting the correctly rounded result + if (t[j] != q[j]) + { + double test = ((double *)q)[j]; + long double correct = f->dfunc.f_fff(s[j], s2[j], s3[j]); + float err = Bruteforce_Ulp_Error_Double(test, correct); + int fail = !(fabsf(err) <= f->double_ulps); + + if (fail && ftz) + { + // retry per section 6.5.3.2 + if (IsDoubleSubnormal(correct)) + { // look at me, + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + + // retry per section 6.5.3.3 + if (fail && IsDoubleSubnormal(s[j])) + { // look at me, + long double correct2 = + f->dfunc.f_fff(0.0, s2[j], s3[j]); + long double correct3 = + f->dfunc.f_fff(-0.0, s2[j], s3[j]); + float err2 = + Bruteforce_Ulp_Error_Double(test, correct2); + float err3 = + Bruteforce_Ulp_Error_Double(test, correct3); + fail = fail + && ((!(fabsf(err2) <= f->double_ulps)) + && (!(fabsf(err3) <= f->double_ulps))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + + // retry per section 6.5.3.4 + if (IsDoubleResultSubnormal(correct2, + f->double_ulps) + || IsDoubleResultSubnormal(correct3, + f->double_ulps)) + { // look at me now, + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + + // try with first two args as zero + if (IsDoubleSubnormal(s2[j])) + { // its fun to have fun, + correct2 = f->dfunc.f_fff(0.0, 0.0, s3[j]); + correct3 = f->dfunc.f_fff(-0.0, 0.0, s3[j]); + long double correct4 = + f->dfunc.f_fff(0.0, -0.0, s3[j]); + long double correct5 = + f->dfunc.f_fff(-0.0, -0.0, s3[j]); + err2 = + Bruteforce_Ulp_Error_Double(test, correct2); + err3 = + Bruteforce_Ulp_Error_Double(test, correct3); + float err4 = + Bruteforce_Ulp_Error_Double(test, correct4); + float err5 = + Bruteforce_Ulp_Error_Double(test, correct5); + fail = fail + && ((!(fabsf(err2) <= f->double_ulps)) + && (!(fabsf(err3) <= f->double_ulps)) + && (!(fabsf(err4) <= f->double_ulps)) + && (!(fabsf(err5) <= f->double_ulps))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + if (fabsf(err4) < fabsf(err)) err = err4; + if (fabsf(err5) < fabsf(err)) err = err5; + + // retry per section 6.5.3.4 + if (IsDoubleResultSubnormal(correct2, + f->double_ulps) + || IsDoubleResultSubnormal(correct3, + f->double_ulps) + || IsDoubleResultSubnormal(correct4, + f->double_ulps) + || IsDoubleResultSubnormal(correct5, + f->double_ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + + if (IsDoubleSubnormal(s3[j])) + { // but you have to know how! + correct2 = f->dfunc.f_fff(0.0, 0.0, 0.0f); + correct3 = f->dfunc.f_fff(-0.0, 0.0, 0.0f); + correct4 = f->dfunc.f_fff(0.0, -0.0, 0.0f); + correct5 = f->dfunc.f_fff(-0.0, -0.0, 0.0f); + long double correct6 = + f->dfunc.f_fff(0.0, 0.0, -0.0f); + long double correct7 = + f->dfunc.f_fff(-0.0, 0.0, -0.0f); + long double correct8 = + f->dfunc.f_fff(0.0, -0.0, -0.0f); + long double correct9 = + f->dfunc.f_fff(-0.0, -0.0, -0.0f); + err2 = Bruteforce_Ulp_Error_Double( + test, correct2); + err3 = Bruteforce_Ulp_Error_Double( + test, correct3); + err4 = Bruteforce_Ulp_Error_Double( + test, correct4); + err5 = Bruteforce_Ulp_Error_Double( + test, correct5); + float err6 = Bruteforce_Ulp_Error_Double( + test, correct6); + float err7 = Bruteforce_Ulp_Error_Double( + test, correct7); + float err8 = Bruteforce_Ulp_Error_Double( + test, correct8); + float err9 = Bruteforce_Ulp_Error_Double( + test, correct9); + fail = fail + && ((!(fabsf(err2) <= f->double_ulps)) + && (!(fabsf(err3) + <= f->double_ulps)) + && (!(fabsf(err4) + <= f->double_ulps)) + && (!(fabsf(err5) + <= f->double_ulps)) + && (!(fabsf(err5) + <= f->double_ulps)) + && (!(fabsf(err6) + <= f->double_ulps)) + && (!(fabsf(err7) + <= f->double_ulps)) + && (!(fabsf(err8) + <= f->double_ulps))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + if (fabsf(err4) < fabsf(err)) err = err4; + if (fabsf(err5) < fabsf(err)) err = err5; + if (fabsf(err6) < fabsf(err)) err = err6; + if (fabsf(err7) < fabsf(err)) err = err7; + if (fabsf(err8) < fabsf(err)) err = err8; + if (fabsf(err9) < fabsf(err)) err = err9; + + // retry per section 6.5.3.4 + if (IsDoubleResultSubnormal(correct2, + f->double_ulps) + || IsDoubleResultSubnormal( + correct3, f->double_ulps) + || IsDoubleResultSubnormal( + correct4, f->double_ulps) + || IsDoubleResultSubnormal( + correct5, f->double_ulps) + || IsDoubleResultSubnormal( + correct6, f->double_ulps) + || IsDoubleResultSubnormal( + correct7, f->double_ulps) + || IsDoubleResultSubnormal( + correct8, f->double_ulps) + || IsDoubleResultSubnormal( + correct9, f->double_ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + } + } + else if (IsDoubleSubnormal(s3[j])) + { + correct2 = f->dfunc.f_fff(0.0, s2[j], 0.0); + correct3 = f->dfunc.f_fff(-0.0, s2[j], 0.0); + long double correct4 = + f->dfunc.f_fff(0.0, s2[j], -0.0); + long double correct5 = + f->dfunc.f_fff(-0.0, s2[j], -0.0); + err2 = + Bruteforce_Ulp_Error_Double(test, correct2); + err3 = + Bruteforce_Ulp_Error_Double(test, correct3); + float err4 = + Bruteforce_Ulp_Error_Double(test, correct4); + float err5 = + Bruteforce_Ulp_Error_Double(test, correct5); + fail = fail + && ((!(fabsf(err2) <= f->double_ulps)) + && (!(fabsf(err3) <= f->double_ulps)) + && (!(fabsf(err4) <= f->double_ulps)) + && (!(fabsf(err5) <= f->double_ulps))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + if (fabsf(err4) < fabsf(err)) err = err4; + if (fabsf(err5) < fabsf(err)) err = err5; + + // retry per section 6.5.3.4 + if (IsDoubleResultSubnormal(correct2, + f->double_ulps) + || IsDoubleResultSubnormal(correct3, + f->double_ulps) + || IsDoubleResultSubnormal(correct4, + f->double_ulps) + || IsDoubleResultSubnormal(correct5, + f->double_ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + } + } + else if (fail && IsDoubleSubnormal(s2[j])) + { + long double correct2 = + f->dfunc.f_fff(s[j], 0.0, s3[j]); + long double correct3 = + f->dfunc.f_fff(s[j], -0.0, s3[j]); + float err2 = + Bruteforce_Ulp_Error_Double(test, correct2); + float err3 = + Bruteforce_Ulp_Error_Double(test, correct3); + fail = fail + && ((!(fabsf(err2) <= f->double_ulps)) + && (!(fabsf(err3) <= f->double_ulps))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + + // retry per section 6.5.3.4 + if (IsDoubleResultSubnormal(correct2, + f->double_ulps) + || IsDoubleResultSubnormal(correct3, + f->double_ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + + // try with second two args as zero + if (IsDoubleSubnormal(s3[j])) + { + correct2 = f->dfunc.f_fff(s[j], 0.0, 0.0); + correct3 = f->dfunc.f_fff(s[j], -0.0, 0.0); + long double correct4 = + f->dfunc.f_fff(s[j], 0.0, -0.0); + long double correct5 = + f->dfunc.f_fff(s[j], -0.0, -0.0); + err2 = + Bruteforce_Ulp_Error_Double(test, correct2); + err3 = + Bruteforce_Ulp_Error_Double(test, correct3); + float err4 = + Bruteforce_Ulp_Error_Double(test, correct4); + float err5 = + Bruteforce_Ulp_Error_Double(test, correct5); + fail = fail + && ((!(fabsf(err2) <= f->double_ulps)) + && (!(fabsf(err3) <= f->double_ulps)) + && (!(fabsf(err4) <= f->double_ulps)) + && (!(fabsf(err5) <= f->double_ulps))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + if (fabsf(err4) < fabsf(err)) err = err4; + if (fabsf(err5) < fabsf(err)) err = err5; + + // retry per section 6.5.3.4 + if (IsDoubleResultSubnormal(correct2, + f->double_ulps) + || IsDoubleResultSubnormal(correct3, + f->double_ulps) + || IsDoubleResultSubnormal(correct4, + f->double_ulps) + || IsDoubleResultSubnormal(correct5, + f->double_ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + } + } + else if (fail && IsDoubleSubnormal(s3[j])) + { + long double correct2 = + f->dfunc.f_fff(s[j], s2[j], 0.0); + long double correct3 = + f->dfunc.f_fff(s[j], s2[j], -0.0); + float err2 = + Bruteforce_Ulp_Error_Double(test, correct2); + float err3 = + Bruteforce_Ulp_Error_Double(test, correct3); + fail = fail + && ((!(fabsf(err2) <= f->double_ulps)) + && (!(fabsf(err3) <= f->double_ulps))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + + // retry per section 6.5.3.4 + if (IsDoubleResultSubnormal(correct2, + f->double_ulps) + || IsDoubleResultSubnormal(correct3, + f->double_ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + } + } + + if (fabsf(err) > maxError) + { + maxError = fabsf(err); + maxErrorVal = s[j]; + maxErrorVal2 = s2[j]; + maxErrorVal3 = s3[j]; + } + + if (fail) + { + vlog_error("\nERROR: %sD%s: %f ulp error at {%.13la, " + "%.13la, %.13la}: *%.13la vs. %.13la\n", + f->name, sizeNames[k], err, s[j], s2[j], + s3[j], ((double *)gOut_Ref)[j], test); + error = -1; + goto exit; + } + } + } + } + + if (0 == (i & 0x0fffffff)) + { + if (gVerboseBruteForce) + { + vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, + BUFFER_SIZE); + } + else + { + vlog("."); + } + fflush(stdout); + } + } + + if (!gSkipCorrectnessTesting) + { + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2, + maxErrorVal3); + } + + vlog("\n"); + +exit: + // Release + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + clReleaseKernel(kernels[k]); + clReleaseProgram(programs[k]); + } + + return error; +} diff --git a/test_conformance/math_brute_force/ternary_float.cpp b/test_conformance/math_brute_force/ternary_float.cpp new file mode 100644 index 0000000000..c69083ada1 --- /dev/null +++ b/test_conformance/math_brute_force/ternary_float.cpp @@ -0,0 +1,879 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "function_list.h" +#include "test_functions.h" +#include "utility.h" + +#include + +#define CORRECTLY_ROUNDED 0 +#define FLUSHED 1 + +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) +{ + const char *c[] = { "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global float", + sizeNames[vectorSize], + "* out, __global float", + sizeNames[vectorSize], + "* in1, __global float", + sizeNames[vectorSize], + "* in2, __global float", + sizeNames[vectorSize], + "* in3 )\n" + "{\n" + " size_t i = get_global_id(0);\n" + " out[i] = ", + name, + "( in1[i], in2[i], in3[i] );\n" + "}\n" }; + + const char *c3[] = { + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global float* out, __global float* in, __global float* in2, " + "__global float* in3)\n" + "{\n" + " size_t i = get_global_id(0);\n" + " if( i + 1 < get_global_size(0) )\n" + " {\n" + " float3 f0 = vload3( 0, in + 3 * i );\n" + " float3 f1 = vload3( 0, in2 + 3 * i );\n" + " float3 f2 = vload3( 0, in3 + 3 * i );\n" + " f0 = ", + name, + "( f0, f1, f2 );\n" + " vstore3( f0, 0, out + 3*i );\n" + " }\n" + " else\n" + " {\n" + " size_t parity = i & 1; // Figure out how many elements are " + "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " + "buffer size \n" + " float3 f0;\n" + " float3 f1;\n" + " float3 f2;\n" + " switch( parity )\n" + " {\n" + " case 1:\n" + " f0 = (float3)( in[3*i], NAN, NAN ); \n" + " f1 = (float3)( in2[3*i], NAN, NAN ); \n" + " f2 = (float3)( in3[3*i], NAN, NAN ); \n" + " break;\n" + " case 0:\n" + " f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n" + " f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n" + " f2 = (float3)( in3[3*i], in3[3*i+1], NAN ); \n" + " break;\n" + " }\n" + " f0 = ", + name, + "( f0, f1, f2 );\n" + " switch( parity )\n" + " {\n" + " case 0:\n" + " out[3*i+1] = f0.y; \n" + " // fall through\n" + " case 1:\n" + " out[3*i] = f0.x; \n" + " break;\n" + " }\n" + " }\n" + "}\n" + }; + + const char **kern = c; + size_t kernSize = sizeof(c) / sizeof(c[0]); + + if (sizeValues[vectorSize] == 3) + { + kern = c3; + kernSize = sizeof(c3) / sizeof(c3[0]); + } + + char testName[32]; + snprintf(testName, sizeof(testName) - 1, "math_kernel%s", + sizeNames[vectorSize]); + + return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); +} + +struct BuildKernelInfo +{ + cl_uint offset; // the first vector size to build + cl_kernel *kernels; + cl_program *programs; + const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. +}; + +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +{ + BuildKernelInfo *info = (BuildKernelInfo *)p; + cl_uint i = info->offset + job_id; + return BuildKernel(info->nameInCode, i, info->kernels + i, + info->programs + i, info->relaxedMode); +} + +// A table of more difficult cases to get right +const float specialValues[] = { + -NAN, + -INFINITY, + -FLT_MAX, + MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), + MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), + MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39), + MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), + MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), + MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38), + -3.0f, + MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), + -2.5f, + MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), + -2.0f, + MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), + -1.75f, + -1.5f, + -1.25f, + MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24), + MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), + MAKE_HEX_FLOAT(-0x1.003p0f, -0x1003000L, -24), + -MAKE_HEX_FLOAT(0x1.001p0f, 0x1001000L, -24), + -1.0f, + MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25), + MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), + -FLT_MIN, + MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), + MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), + MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), + MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), + MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), + MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150), + MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), + MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), + MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), + MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), + -0.0f, + + +NAN, + +INFINITY, + +FLT_MAX, + MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), + MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), + MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), + MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), + MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), + MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38), + +3.0f, + MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), + 2.5f, + MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23), + +2.0f, + MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), + 1.75f, + 1.5f, + 1.25f, + MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), + MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), + MAKE_HEX_FLOAT(0x1.003p0f, 0x1003000L, -24), + +MAKE_HEX_FLOAT(0x1.001p0f, 0x1001000L, -24), + +1.0f, + MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25), + MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), + +FLT_MIN, + MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), + MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), + MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), + MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), + MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), + MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150), + MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), + MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), + MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), + MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), + +0.0f, +}; + +constexpr size_t specialValuesCount = + sizeof(specialValues) / sizeof(specialValues[0]); + +} // anonymous namespace + +int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) +{ + int error; + + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + + cl_program programs[VECTOR_SIZE_COUNT]; + cl_kernel kernels[VECTOR_SIZE_COUNT]; + float maxError = 0.0f; + int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + float maxErrorVal = 0.0f; + float maxErrorVal2 = 0.0f; + float maxErrorVal3 = 0.0f; + uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE); + + cl_uchar overflow[BUFFER_SIZE / sizeof(float)]; + + float float_ulps; + if (gIsEmbedded) + float_ulps = f->float_embedded_ulps; + else + float_ulps = f->float_ulps; + + int skipNanInf = (0 == strcmp("fma", f->nameInCode)) && !gInfNanSupport; + + // Init the kernels + { + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + return error; + } + + for (uint64_t i = 0; i < (1ULL << 32); i += step) + { + // Init input array + cl_uint *p = (cl_uint *)gIn; + cl_uint *p2 = (cl_uint *)gIn2; + cl_uint *p3 = (cl_uint *)gIn3; + size_t idx = 0; + + if (i == 0) + { // test edge cases + float *fp = (float *)gIn; + float *fp2 = (float *)gIn2; + float *fp3 = (float *)gIn3; + uint32_t x, y, z; + x = y = z = 0; + for (; idx < BUFFER_SIZE / sizeof(float); idx++) + { + fp[idx] = specialValues[x]; + fp2[idx] = specialValues[y]; + fp3[idx] = specialValues[z]; + + if (++x >= specialValuesCount) + { + x = 0; + if (++y >= specialValuesCount) + { + y = 0; + if (++z >= specialValuesCount) break; + } + } + } + if (idx == BUFFER_SIZE / sizeof(float)) + vlog_error("Test Error: not all special cases tested!\n"); + } + + for (; idx < BUFFER_SIZE / sizeof(float); idx++) + { + p[idx] = genrand_int32(d); + p2[idx] = genrand_int32(d); + p3[idx] = genrand_int32(d); + } + + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, + BUFFER_SIZE, gIn, 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); + return error; + } + + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, + BUFFER_SIZE, gIn2, 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error); + return error; + } + + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, + BUFFER_SIZE, gIn3, 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error); + return error; + } + + // write garbage into output arrays + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + uint32_t pattern = 0xffffdead; + memset_pattern4(gOut[j], &pattern, BUFFER_SIZE); + if ((error = + clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, + BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", + error, j); + goto exit; + } + } + + // Run the kernels + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + size_t vectorSize = sizeof(cl_float) * sizeValues[j]; + size_t localCount = (BUFFER_SIZE + vectorSize - 1) + / vectorSize; // BUFFER_SIZE / vectorSize rounded up + if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]), + &gOutBuffer[j]))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer), + &gInBuffer))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer2), + &gInBuffer2))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer3), + &gInBuffer3))) + { + LogBuildError(programs[j]); + goto exit; + } + + if ((error = + clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, + &localCount, NULL, 0, NULL, NULL))) + { + vlog_error("FAILED -- could not execute kernel\n"); + goto exit; + } + } + + // Get that moving + if ((error = clFlush(gQueue))) vlog("clFlush failed\n"); + + // Calculate the correctly rounded reference result + float *r = (float *)gOut_Ref; + float *s = (float *)gIn; + float *s2 = (float *)gIn2; + float *s3 = (float *)gIn3; + if (skipNanInf) + { + for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++) + { + feclearexcept(FE_OVERFLOW); + r[j] = + (float)f->func.f_fma(s[j], s2[j], s3[j], CORRECTLY_ROUNDED); + overflow[j] = + FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW)); + } + } + else + { + for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++) + r[j] = + (float)f->func.f_fma(s[j], s2[j], s3[j], CORRECTLY_ROUNDED); + } + + // Read the data back + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + if ((error = + clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, + BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + { + vlog_error("ReadArray failed %d\n", error); + goto exit; + } + } + + if (gSkipCorrectnessTesting) break; + + // Verify data + uint32_t *t = (uint32_t *)gOut_Ref; + for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++) + { + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + uint32_t *q = (uint32_t *)(gOut[k]); + + // If we aren't getting the correctly rounded result + if (t[j] != q[j]) + { + float err; + int fail; + float test = ((float *)q)[j]; + float correct = + f->func.f_fma(s[j], s2[j], s3[j], CORRECTLY_ROUNDED); + + // Per section 10 paragraph 6, accept any result if an input + // or output is a infinity or NaN or overflow + if (skipNanInf) + { + if (overflow[j] || IsFloatInfinity(correct) + || IsFloatNaN(correct) || IsFloatInfinity(s[j]) + || IsFloatNaN(s[j]) || IsFloatInfinity(s2[j]) + || IsFloatNaN(s2[j]) || IsFloatInfinity(s3[j]) + || IsFloatNaN(s3[j])) + continue; + } + + + err = Ulp_Error(test, correct); + fail = !(fabsf(err) <= float_ulps); + + if (fail && ftz) + { + float correct2, err2; + + // retry per section 6.5.3.2 with flushing on + if (0.0f == test + && 0.0f + == f->func.f_fma(s[j], s2[j], s3[j], FLUSHED)) + { + fail = 0; + err = 0.0f; + } + + // retry per section 6.5.3.3 + if (fail && IsFloatSubnormal(s[j])) + { // look at me, + float err3, correct3; + + if (skipNanInf) feclearexcept(FE_OVERFLOW); + + correct2 = f->func.f_fma(0.0f, s2[j], s3[j], + CORRECTLY_ROUNDED); + correct3 = f->func.f_fma(-0.0f, s2[j], s3[j], + CORRECTLY_ROUNDED); + + if (skipNanInf) + { + if (fetestexcept(FE_OVERFLOW)) continue; + + // Note: no double rounding here. Reference + // functions calculate in single precision. + if (IsFloatInfinity(correct2) + || IsFloatNaN(correct2) + || IsFloatInfinity(correct3) + || IsFloatNaN(correct3)) + continue; + } + + err2 = Ulp_Error(test, correct2); + err3 = Ulp_Error(test, correct3); + fail = fail + && ((!(fabsf(err2) <= float_ulps)) + && (!(fabsf(err3) <= float_ulps))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + + // retry per section 6.5.3.4 + if (0.0f == test + && (0.0f + == f->func.f_fma(0.0f, s2[j], s3[j], + FLUSHED) + || 0.0f + == f->func.f_fma(-0.0f, s2[j], s3[j], + FLUSHED))) + { + fail = 0; + err = 0.0f; + } + + // try with first two args as zero + if (IsFloatSubnormal(s2[j])) + { // its fun to have fun, + double correct4, correct5; + float err4, err5; + + if (skipNanInf) feclearexcept(FE_OVERFLOW); + + correct2 = f->func.f_fma(0.0f, 0.0f, s3[j], + CORRECTLY_ROUNDED); + correct3 = f->func.f_fma(-0.0f, 0.0f, s3[j], + CORRECTLY_ROUNDED); + correct4 = f->func.f_fma(0.0f, -0.0f, s3[j], + CORRECTLY_ROUNDED); + correct5 = f->func.f_fma(-0.0f, -0.0f, s3[j], + CORRECTLY_ROUNDED); + + // Per section 10 paragraph 6, accept any result + // if an input or output is a infinity or NaN or + // overflow + if (!gInfNanSupport) + { + if (fetestexcept(FE_OVERFLOW)) continue; + + // Note: no double rounding here. Reference + // functions calculate in single precision. + if (IsFloatInfinity(correct2) + || IsFloatNaN(correct2) + || IsFloatInfinity(correct3) + || IsFloatNaN(correct3) + || IsFloatInfinity(correct4) + || IsFloatNaN(correct4) + || IsFloatInfinity(correct5) + || IsFloatNaN(correct5)) + continue; + } + + err2 = Ulp_Error(test, correct2); + err3 = Ulp_Error(test, correct3); + err4 = Ulp_Error(test, correct4); + err5 = Ulp_Error(test, correct5); + fail = fail + && ((!(fabsf(err2) <= float_ulps)) + && (!(fabsf(err3) <= float_ulps)) + && (!(fabsf(err4) <= float_ulps)) + && (!(fabsf(err5) <= float_ulps))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + if (fabsf(err4) < fabsf(err)) err = err4; + if (fabsf(err5) < fabsf(err)) err = err5; + + // retry per section 6.5.3.4 + if (0.0f == test + && (0.0f + == f->func.f_fma(0.0f, 0.0f, s3[j], + FLUSHED) + || 0.0f + == f->func.f_fma(-0.0f, 0.0f, s3[j], + FLUSHED) + || 0.0f + == f->func.f_fma(0.0f, -0.0f, s3[j], + FLUSHED) + || 0.0f + == f->func.f_fma(-0.0f, -0.0f, + s3[j], FLUSHED))) + { + fail = 0; + err = 0.0f; + } + + if (IsFloatSubnormal(s3[j])) + { + if (test == 0.0f) // 0*0+0 is 0 + { + fail = 0; + err = 0.0f; + } + } + } + else if (IsFloatSubnormal(s3[j])) + { + double correct4, correct5; + float err4, err5; + + if (skipNanInf) feclearexcept(FE_OVERFLOW); + + correct2 = f->func.f_fma(0.0f, s2[j], 0.0f, + CORRECTLY_ROUNDED); + correct3 = f->func.f_fma(-0.0f, s2[j], 0.0f, + CORRECTLY_ROUNDED); + correct4 = f->func.f_fma(0.0f, s2[j], -0.0f, + CORRECTLY_ROUNDED); + correct5 = f->func.f_fma(-0.0f, s2[j], -0.0f, + CORRECTLY_ROUNDED); + + // Per section 10 paragraph 6, accept any result + // if an input or output is a infinity or NaN or + // overflow + if (!gInfNanSupport) + { + if (fetestexcept(FE_OVERFLOW)) continue; + + // Note: no double rounding here. Reference + // functions calculate in single precision. + if (IsFloatInfinity(correct2) + || IsFloatNaN(correct2) + || IsFloatInfinity(correct3) + || IsFloatNaN(correct3) + || IsFloatInfinity(correct4) + || IsFloatNaN(correct4) + || IsFloatInfinity(correct5) + || IsFloatNaN(correct5)) + continue; + } + + err2 = Ulp_Error(test, correct2); + err3 = Ulp_Error(test, correct3); + err4 = Ulp_Error(test, correct4); + err5 = Ulp_Error(test, correct5); + fail = fail + && ((!(fabsf(err2) <= float_ulps)) + && (!(fabsf(err3) <= float_ulps)) + && (!(fabsf(err4) <= float_ulps)) + && (!(fabsf(err5) <= float_ulps))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + if (fabsf(err4) < fabsf(err)) err = err4; + if (fabsf(err5) < fabsf(err)) err = err5; + + // retry per section 6.5.3.4 + if (0.0f == test + && (0.0f + == f->func.f_fma(0.0f, s2[j], 0.0f, + FLUSHED) + || 0.0f + == f->func.f_fma(-0.0f, s2[j], 0.0f, + FLUSHED) + || 0.0f + == f->func.f_fma(0.0f, s2[j], -0.0f, + FLUSHED) + || 0.0f + == f->func.f_fma(-0.0f, s2[j], + -0.0f, FLUSHED))) + { + fail = 0; + err = 0.0f; + } + } + } + else if (fail && IsFloatSubnormal(s2[j])) + { + double correct2, correct3; + float err2, err3; + + if (skipNanInf) feclearexcept(FE_OVERFLOW); + + correct2 = f->func.f_fma(s[j], 0.0f, s3[j], + CORRECTLY_ROUNDED); + correct3 = f->func.f_fma(s[j], -0.0f, s3[j], + CORRECTLY_ROUNDED); + + if (skipNanInf) + { + if (fetestexcept(FE_OVERFLOW)) continue; + + // Note: no double rounding here. Reference + // functions calculate in single precision. + if (IsFloatInfinity(correct2) + || IsFloatNaN(correct2) + || IsFloatInfinity(correct3) + || IsFloatNaN(correct3)) + continue; + } + + err2 = Ulp_Error(test, correct2); + err3 = Ulp_Error(test, correct3); + fail = fail + && ((!(fabsf(err2) <= float_ulps)) + && (!(fabsf(err3) <= float_ulps))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + + // retry per section 6.5.3.4 + if (0.0f == test + && (0.0f + == f->func.f_fma(s[j], 0.0f, s3[j], + FLUSHED) + || 0.0f + == f->func.f_fma(s[j], -0.0f, s3[j], + FLUSHED))) + { + fail = 0; + err = 0.0f; + } + + // try with second two args as zero + if (IsFloatSubnormal(s3[j])) + { + double correct4, correct5; + float err4, err5; + + if (skipNanInf) feclearexcept(FE_OVERFLOW); + + correct2 = f->func.f_fma(s[j], 0.0f, 0.0f, + CORRECTLY_ROUNDED); + correct3 = f->func.f_fma(s[j], -0.0f, 0.0f, + CORRECTLY_ROUNDED); + correct4 = f->func.f_fma(s[j], 0.0f, -0.0f, + CORRECTLY_ROUNDED); + correct5 = f->func.f_fma(s[j], -0.0f, -0.0f, + CORRECTLY_ROUNDED); + + // Per section 10 paragraph 6, accept any result + // if an input or output is a infinity or NaN or + // overflow + if (!gInfNanSupport) + { + if (fetestexcept(FE_OVERFLOW)) continue; + + // Note: no double rounding here. Reference + // functions calculate in single precision. + if (IsFloatInfinity(correct2) + || IsFloatNaN(correct2) + || IsFloatInfinity(correct3) + || IsFloatNaN(correct3) + || IsFloatInfinity(correct4) + || IsFloatNaN(correct4) + || IsFloatInfinity(correct5) + || IsFloatNaN(correct5)) + continue; + } + + err2 = Ulp_Error(test, correct2); + err3 = Ulp_Error(test, correct3); + err4 = Ulp_Error(test, correct4); + err5 = Ulp_Error(test, correct5); + fail = fail + && ((!(fabsf(err2) <= float_ulps)) + && (!(fabsf(err3) <= float_ulps)) + && (!(fabsf(err4) <= float_ulps)) + && (!(fabsf(err5) <= float_ulps))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + if (fabsf(err4) < fabsf(err)) err = err4; + if (fabsf(err5) < fabsf(err)) err = err5; + + // retry per section 6.5.3.4 + if (0.0f == test + && (0.0f + == f->func.f_fma(s[j], 0.0f, 0.0f, + FLUSHED) + || 0.0f + == f->func.f_fma(s[j], -0.0f, 0.0f, + FLUSHED) + || 0.0f + == f->func.f_fma(s[j], 0.0f, -0.0f, + FLUSHED) + || 0.0f + == f->func.f_fma(s[j], -0.0f, -0.0f, + FLUSHED))) + { + fail = 0; + err = 0.0f; + } + } + } + else if (fail && IsFloatSubnormal(s3[j])) + { + double correct2, correct3; + float err2, err3; + + if (skipNanInf) feclearexcept(FE_OVERFLOW); + + correct2 = f->func.f_fma(s[j], s2[j], 0.0f, + CORRECTLY_ROUNDED); + correct3 = f->func.f_fma(s[j], s2[j], -0.0f, + CORRECTLY_ROUNDED); + + if (skipNanInf) + { + if (fetestexcept(FE_OVERFLOW)) continue; + + // Note: no double rounding here. Reference + // functions calculate in single precision. + if (IsFloatInfinity(correct2) + || IsFloatNaN(correct2) + || IsFloatInfinity(correct3) + || IsFloatNaN(correct3)) + continue; + } + + err2 = Ulp_Error(test, correct2); + err3 = Ulp_Error(test, correct3); + fail = fail + && ((!(fabsf(err2) <= float_ulps)) + && (!(fabsf(err3) <= float_ulps))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + + // retry per section 6.5.3.4 + if (0.0f == test + && (0.0f + == f->func.f_fma(s[j], s2[j], 0.0f, + FLUSHED) + || 0.0f + == f->func.f_fma(s[j], s2[j], -0.0f, + FLUSHED))) + { + fail = 0; + err = 0.0f; + } + } + } + + if (fabsf(err) > maxError) + { + maxError = fabsf(err); + maxErrorVal = s[j]; + maxErrorVal2 = s2[j]; + maxErrorVal3 = s3[j]; + } + + if (fail) + { + vlog_error( + "\nERROR: %s%s: %f ulp error at {%a, %a, %a} " + "({0x%8.8x, 0x%8.8x, 0x%8.8x}): *%a vs. %a\n", + f->name, sizeNames[k], err, s[j], s2[j], s3[j], + ((cl_uint *)s)[j], ((cl_uint *)s2)[j], + ((cl_uint *)s3)[j], ((float *)gOut_Ref)[j], test); + error = -1; + goto exit; + } + } + } + } + + if (0 == (i & 0x0fffffff)) + { + if (gVerboseBruteForce) + { + vlog("base:%14u step:%10u bufferSize:%10zd \n", i, step, + BUFFER_SIZE); + } + else + { + vlog("."); + } + fflush(stdout); + } + } + + if (!gSkipCorrectnessTesting) + { + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2, + maxErrorVal3); + } + + vlog("\n"); + +exit: + // Release + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + clReleaseKernel(kernels[k]); + clReleaseProgram(programs[k]); + } + + return error; +} diff --git a/test_conformance/math_brute_force/test_functions.h b/test_conformance/math_brute_force/test_functions.h new file mode 100644 index 0000000000..78aef9c9a6 --- /dev/null +++ b/test_conformance/math_brute_force/test_functions.h @@ -0,0 +1,118 @@ +// +// Copyright (c) 2021 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TEST_FUNCTIONS_H +#define TEST_FUNCTIONS_H + +#include "function_list.h" + +// float foo(float) +int TestFunc_Float_Float(const Func *f, MTdata, bool relaxedMode); + +// double foo(double) +int TestFunc_Double_Double(const Func *f, MTdata, bool relaxedMode); + +// int foo(float) +int TestFunc_Int_Float(const Func *f, MTdata, bool relaxedMode); + +// int foo(double) +int TestFunc_Int_Double(const Func *f, MTdata, bool relaxedMode); + +// float foo(uint) +int TestFunc_Float_UInt(const Func *f, MTdata, bool relaxedMode); + +// double foo(ulong) +int TestFunc_Double_ULong(const Func *f, MTdata, bool relaxedMode); + +// Returns {0, 1} for scalar and {0, -1} for vector. +// int foo(float) +int TestMacro_Int_Float(const Func *f, MTdata, bool relaxedMode); + +// Returns {0, 1} for scalar and {0, -1} for vector. +// int foo(double) +int TestMacro_Int_Double(const Func *f, MTdata, bool relaxedMode); + +// float foo(float, float) +int TestFunc_Float_Float_Float(const Func *f, MTdata, bool relaxedMode); + +// double foo(double, double) +int TestFunc_Double_Double_Double(const Func *f, MTdata, bool relaxedMode); + +// Special handling for nextafter. +// float foo(float, float) +int TestFunc_Float_Float_Float_nextafter(const Func *f, MTdata, + bool relaxedMode); + +// Special handling for nextafter. +// double foo(double, double) +int TestFunc_Double_Double_Double_nextafter(const Func *f, MTdata, + bool relaxedMode); + +// float op float +int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata, + bool relaxedMode); + +// double op double +int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata, + bool relaxedMode); + +// float foo(float, int) +int TestFunc_Float_Float_Int(const Func *f, MTdata, bool relaxedMode); + +// double foo(double, int) +int TestFunc_Double_Double_Int(const Func *f, MTdata, bool relaxedMode); + +// Returns {0, 1} for scalar and {0, -1} for vector. +// int foo(float, float) +int TestMacro_Int_Float_Float(const Func *f, MTdata, bool relaxedMode); + +// Returns {0, 1} for scalar and {0, -1} for vector. +// int foo(double, double) +int TestMacro_Int_Double_Double(const Func *f, MTdata, bool relaxedMode); + +// float foo(float, float, float) +int TestFunc_Float_Float_Float_Float(const Func *f, MTdata, bool relaxedMode); + +// double foo(double, double, double) +int TestFunc_Double_Double_Double_Double(const Func *f, MTdata, + bool relaxedMode); + +// float foo(float, float*) +int TestFunc_Float2_Float(const Func *f, MTdata, bool relaxedMode); + +// double foo(double, double*) +int TestFunc_Double2_Double(const Func *f, MTdata, bool relaxedMode); + +// float foo(float, int*) +int TestFunc_FloatI_Float(const Func *f, MTdata, bool relaxedMode); + +// double foo(double, int*) +int TestFunc_DoubleI_Double(const Func *f, MTdata, bool relaxedMode); + +// float foo(float, float, int*) +int TestFunc_FloatI_Float_Float(const Func *f, MTdata, bool relaxedMode); + +// double foo(double, double, int*) +int TestFunc_DoubleI_Double_Double(const Func *f, MTdata, bool relaxedMode); + +// Special handling for mad. +// float mad(float, float, float) +int TestFunc_mad_Float(const Func *f, MTdata, bool relaxedMode); + +// Special handling for mad. +// double mad(double, double, double) +int TestFunc_mad_Double(const Func *f, MTdata, bool relaxedMode); + +#endif diff --git a/test_conformance/math_brute_force/unary.cpp b/test_conformance/math_brute_force/unary.cpp deleted file mode 100644 index 0cde4f3019..0000000000 --- a/test_conformance/math_brute_force/unary.cpp +++ /dev/null @@ -1,1249 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "Utility.h" - -#include -#include "FunctionList.h" - -#if defined( __APPLE__ ) - #include -#endif - -int TestFunc_Float_Float(const Func *f, MTdata, bool relaxedMode); -int TestFunc_Double_Double(const Func *f, MTdata, bool relaxedMode); - -extern const vtbl _unary = { "unary", TestFunc_Float_Float, - TestFunc_Double_Double }; - -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode); -static int BuildKernelDouble(const char *name, int vectorSize, - cl_uint kernel_count, cl_kernel *k, cl_program *p, - bool relaxedMode); - -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) -{ - const char *c[] = { - "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in)\n" - "{\n" - " int i = get_global_id(0);\n" - " out[i] = ", name, "( in[i] );\n" - "}\n" - }; - const char *c3[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global float* in)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " float3 f0 = vload3( 0, in + 3 * i );\n" - " f0 = ", name, "( f0 );\n" - " vstore3( f0, 0, out + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n" - " float3 f0;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " f0 = (float3)( in[3*i], NAN, NAN ); \n" - " break;\n" - " case 0:\n" - " f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n" - " break;\n" - " }\n" - " f0 = ", name, "( f0 );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = f0.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = f0.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - - - const char **kern = c; - size_t kernSize = sizeof(c)/sizeof(c[0]); - - if( sizeValues[vectorSize] == 3 ) - { - kern = c3; - kernSize = sizeof(c3)/sizeof(c3[0]); - } - - char testName[32]; - snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - - return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, - relaxedMode); -} - -static int BuildKernelDouble(const char *name, int vectorSize, - cl_uint kernel_count, cl_kernel *k, cl_program *p, - bool relaxedMode) -{ - const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in)\n" - "{\n" - " int i = get_global_id(0);\n" - " out[i] = ", name, "( in[i] );\n" - "}\n" - }; - - const char *c3[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " double3 f0 = vload3( 0, in + 3 * i );\n" - " f0 = ", name, "( f0 );\n" - " vstore3( f0, 0, out + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n" - " double3 f0;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " f0 = (double3)( in[3*i], NAN, NAN ); \n" - " break;\n" - " case 0:\n" - " f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n" - " break;\n" - " }\n" - " f0 = ", name, "( f0 );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = f0.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = f0.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - - const char **kern = c; - size_t kernSize = sizeof(c)/sizeof(c[0]); - - if( sizeValues[vectorSize] == 3 ) - { - kern = c3; - kernSize = sizeof(c3)/sizeof(c3[0]); - } - - - char testName[32]; - snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - - return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, - relaxedMode); -} - -typedef struct BuildKernelInfo -{ - cl_uint offset; // the first vector size to build - cl_uint kernel_count; - cl_kernel **kernels; - cl_program *programs; - const char *nameInCode; - bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -}BuildKernelInfo; - -static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); -static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ) -{ - BuildKernelInfo *info = (BuildKernelInfo*) p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i], info->programs + i, info->relaxedMode); -} - -static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); -static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ) -{ - BuildKernelInfo *info = (BuildKernelInfo*) p; - cl_uint i = info->offset + job_id; - return BuildKernelDouble(info->nameInCode, i, info->kernel_count, - info->kernels[i], info->programs + i, - info->relaxedMode); -} - -//Thread specific data for a worker thread -typedef struct ThreadInfo -{ - cl_mem inBuf; // input buffer for the thread - cl_mem outBuf[ VECTOR_SIZE_COUNT ]; // output buffers for the thread - float maxError; // max error value. Init to 0. - double maxErrorValue; // position of the max error value. Init to 0. - cl_command_queue tQueue; // per thread command queue to improve performance -}ThreadInfo; - -typedef struct TestInfo -{ - size_t subBufferSize; // Size of the sub-buffer in elements - const Func *f; // A pointer to the function info - cl_program programs[ VECTOR_SIZE_COUNT ]; // programs for various vector sizes - cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id] - ThreadInfo *tinfo; // An array of thread specific information for each worker thread - cl_uint threadCount; // Number of worker threads - cl_uint jobCount; // Number of jobs - cl_uint step; // step between each chunk and the next. - cl_uint scale; // stride between individual test values - float ulps; // max_allowed ulps - int ftz; // non-zero if running in flush to zero mode - - int isRangeLimited; // 1 if the function is only to be evaluated over a range - float half_sin_cos_tan_limit; - bool relaxedMode; // True if test is to be run in relaxed mode, false - // otherwise. -}TestInfo; - -static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p ); - -int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - size_t i, j; - float maxError = 0.0f; - double maxErrorVal = 0.0; - int skipTestingRelaxed = (relaxedMode && strcmp(f->name, "tan") == 0); - - logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - - // Init test_info - memset( &test_info, 0, sizeof( test_info ) ); - test_info.threadCount = GetThreadCount(); - - test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = 1; - if (gWimpyMode) - { - test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor; - } - test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - //there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; - test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); - test_info.relaxedMode = relaxedMode; - // cl_kernels aren't thread safe, so we make one for each vector size for every thread - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - { - size_t array_size = test_info.threadCount * sizeof( cl_kernel ); - test_info.k[i] = (cl_kernel*)malloc( array_size ); - if( NULL == test_info.k[i] ) - { - vlog_error( "Error: Unable to allocate storage for kernels!\n" ); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset( test_info.k[i], 0, array_size ); - } - test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) ); - if( NULL == test_info.tinfo ) - { - vlog_error( "Error: Unable to allocate storage for thread specific data.\n" ); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) ); - for( i = 0; i < test_info.threadCount; i++ ) - { - cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_float), test_info.subBufferSize * sizeof( cl_float) }; - test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if( error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if( error || NULL == test_info.tinfo[i].outBuf[j] ) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - } - test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error); - if( NULL == test_info.tinfo[i].tQueue || error ) - { - vlog_error( "clCreateCommandQueue failed. (%d)\n", error ); - goto exit; - } - - } - - // Check for special cases for unary float - test_info.isRangeLimited = 0; - test_info.half_sin_cos_tan_limit = 0; - if( 0 == strcmp( f->name, "half_sin") || 0 == strcmp( f->name, "half_cos") ) - { - test_info.isRangeLimited = 1; - test_info.half_sin_cos_tan_limit = 1.0f + test_info.ulps * (FLT_EPSILON/2.0f); // out of range results from finite inputs must be in [-1,1] - } - else if( 0 == strcmp( f->name, "half_tan")) - { - test_info.isRangeLimited = 1; - test_info.half_sin_cos_tan_limit = INFINITY; // out of range resut from finite inputs must be numeric - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) - goto exit; - } - - if( !gSkipCorrectnessTesting || skipTestingRelaxed) - { - error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info ); - - // Accumulate the arithmetic errors - for( i = 0; i < test_info.threadCount; i++ ) - { - if( test_info.tinfo[i].maxError > maxError ) - { - maxError = test_info.tinfo[i].maxError; - maxErrorVal = test_info.tinfo[i].maxErrorValue; - } - } - - if( error ) - goto exit; - - if( gWimpyMode ) - vlog( "Wimp pass" ); - else - vlog( "passed" ); - - if( skipTestingRelaxed ) - { - vlog(" (rlx skip correctness testing)\n"); - goto exit; - } - } - - if( gMeasureTimes ) - { - //Init input array - uint32_t *p = (uint32_t *)gIn; - if( strstr( f->name, "exp" ) || strstr( f->name, "sin" ) || strstr( f->name, "cos" ) || strstr( f->name, "tan" ) ) - for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ ) - ((float*)p)[j] = (float) genrand_real1(d); - else if( strstr( f->name, "log" ) ) - for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ ) - p[j] = genrand_int32(d) & 0x7fffffff; - else - for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ ) - p[j] = genrand_int32(d); - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeValues[j] * sizeof(cl_float); - size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; - if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError( test_info.programs[j]); goto exit; } - if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; } - - double sum = 0.0; - double bestTime = INFINITY; - for( i = 0; i < PERF_LOOP_COUNT; i++ ) - { - uint64_t startTime = GetTime(); - if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - - // Make sure OpenCL is done - if( (error = clFinish(gQueue) ) ) - { - vlog_error( "Error %d at clFinish\n", error ); - goto exit; - } - - uint64_t endTime = GetTime(); - double current_time = SubtractTime( endTime, startTime ); - sum += current_time; - if( current_time < bestTime ) - bestTime = current_time; - } - - if( gReportAverageTimes ) - bestTime = sum / PERF_LOOP_COUNT; - double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( float ) ); - vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] ); - } - } - - if( ! gSkipCorrectnessTesting ) - vlog( "\t%8.2f @ %a", maxError, maxErrorVal ); - vlog( "\n" ); - -exit: - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - { - clReleaseProgram(test_info.programs[i]); - if( test_info.k[i] ) - { - for( j = 0; j < test_info.threadCount; j++ ) - clReleaseKernel(test_info.k[i][j]); - - free( test_info.k[i] ); - } - } - if( test_info.tinfo ) - { - for( i = 0; i < test_info.threadCount; i++ ) - { - clReleaseMemObject(test_info.tinfo[i].inBuf); - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free( test_info.tinfo ); - } - - return error; -} - -static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data ) -{ - const TestInfo *job = (const TestInfo *) data; - size_t buffer_elements = job->subBufferSize; - size_t buffer_size = buffer_elements * sizeof( cl_float ); - cl_uint scale = job->scale; - cl_uint base = job_id * (cl_uint) job->step; - ThreadInfo *tinfo = job->tinfo + thread_id; - fptr func = job->f->func; - const char * fname = job->f->name; - bool relaxedMode = job->relaxedMode; - float ulps = getAllowedUlpError(job->f, relaxedMode); - if (relaxedMode) - { - func = job->f->rfunc; - } - - cl_uint j, k; - cl_int error; - - int isRangeLimited = job->isRangeLimited; - float half_sin_cos_tan_limit = job->half_sin_cos_tan_limit; - int ftz = job->ftz; - - // start the map of the output arrays - cl_event e[ VECTOR_SIZE_COUNT ]; - cl_uint *out[ VECTOR_SIZE_COUNT ]; - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error); - if( error || NULL == out[j]) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - return error; - } - } - - // Get that moving - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush failed\n" ); - - // Write the new values to the input array - cl_uint *p = (cl_uint*) gIn + thread_id * buffer_elements; - for( j = 0; j < buffer_elements; j++ ) - { - p[j] = base + j * scale; - if (relaxedMode) - { - float p_j = *(float *) &p[j]; - if ( strcmp(fname,"sin")==0 || strcmp(fname,"cos")==0 ) //the domain of the function is [-pi,pi] - { - if (fabs(p_j) > M_PI) ((float *)p)[j] = NAN; - } - - if ( strcmp( fname, "reciprocal" ) == 0 ) - { - const float l_limit = HEX_FLT(+, 1, 0, -, 126); - const float u_limit = HEX_FLT(+, 1, 0, +, 126); - - if (fabs(p_j) < l_limit - || fabs(p_j) - > u_limit) // the domain of the function is [2^-126,2^126] - ((float *)p)[j] = NAN; - } - } - } - - if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error ); - return error; - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - //Wait for the map to finish - if( (error = clWaitForEvents(1, e + j) )) - { - vlog_error( "Error: clWaitForEvents failed! err: %d\n", error ); - return error; - } - if( (error = clReleaseEvent( e[j] ) )) - { - vlog_error( "Error: clReleaseEvent failed! err: %d\n", error ); - return error; - } - - // Fill the result buffer with garbage, so that old results don't carry over - uint32_t pattern = 0xffffdead; - memset_pattern4(out[j], &pattern, buffer_size); - if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error ); - return error; - } - - // run the kernel - size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j]; - cl_kernel kernel = job->k[j][thread_id]; //each worker thread has its own copy of the cl_kernel - cl_program program = job->programs[j]; - - if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; } - if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; } - - if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL))) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - return error; - } - } - - - // Get that moving - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush 2 failed\n" ); - - if( gSkipCorrectnessTesting ) - return CL_SUCCESS; - - //Calculate the correctly rounded reference result - float *r = (float *)gOut_Ref + thread_id * buffer_elements; - float *s = (float *)p; - for( j = 0; j < buffer_elements; j++ ) - r[j] = (float) func.f_f( s[j] ); - - // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue. - for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ ) - { - out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error); - if( error || NULL == out[j] ) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - return error; - } - } - // Wait for the last buffer - out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error); - if( error || NULL == out[j] ) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - return error; - } - - //Verify data - uint32_t *t = (uint32_t *)r; - for( j = 0; j < buffer_elements; j++ ) - { - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - uint32_t *q = out[k]; - - // If we aren't getting the correctly rounded result - if( t[j] != q[j] ) - { - float test = ((float*) q)[j]; - double correct = func.f_f( s[j] ); - float err = Ulp_Error( test, correct ); - float abs_error = Abs_Error( test, correct ); - int fail = 0; - int use_abs_error = 0; - - // it is possible for the output to not match the reference result but for Ulp_Error - // to be zero, for example -1.#QNAN vs. 1.#QNAN. In such cases there is no failure - if (err == 0.0f) - { - fail = 0; - } - else if (relaxedMode) - { - if ( strcmp(fname,"sin")==0 || strcmp(fname,"cos")==0 ) - { - fail = ! (fabsf(abs_error) <= ulps); - use_abs_error = 1; - } - if (strcmp(fname, "sinpi") == 0 - || strcmp(fname, "cospi") == 0) - { - if (s[j] >= -1.0 && s[j] <= 1.0) - { - fail = !(fabsf(abs_error) <= ulps); - use_abs_error = 1; - } - } - - if ( strcmp(fname, "reciprocal") == 0 ) - { - fail = ! (fabsf(err) <= ulps); - } - - if ( strcmp(fname, "exp") == 0 || strcmp(fname, "exp2") == 0 ) - { - float exp_error = ulps; - - if (!gIsEmbedded) - { - exp_error += floor(fabs(2 * s[j])); - } - - fail = ! (fabsf(err) <= exp_error); - ulps = exp_error; - } - if (strcmp(fname, "tan") == 0) { - - if( !gFastRelaxedDerived ) - { - fail = ! (fabsf(err) <= ulps); - } - // Else fast math derived implementation does not require ULP verification - } - if (strcmp(fname, "exp10") == 0) - { - if( !gFastRelaxedDerived ) - { - fail = ! (fabsf(err) <= ulps); - } - // Else fast math derived implementation does not require ULP verification - } - if (strcmp(fname, "log") == 0 || strcmp(fname, "log2") == 0 - || strcmp(fname, "log10") == 0) - { - if( s[j] >= 0.5 && s[j] <= 2 ) - { - fail = ! (fabsf(abs_error) <= ulps ); - } - else - { - ulps = gIsEmbedded ? job->f->float_embedded_ulps : job->f->float_ulps; - fail = ! (fabsf(err) <= ulps); - } - } - - - // fast-relaxed implies finite-only - if( IsFloatInfinity(correct) || IsFloatNaN(correct) || - IsFloatInfinity(s[j]) || IsFloatNaN(s[j]) ) { - fail = 0; - err = 0; - } - } - else - { - fail = ! (fabsf(err) <= ulps); - } - - // half_sin/cos/tan are only valid between +-2**16, Inf, NaN - if( isRangeLimited && fabsf(s[j]) > MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16) && fabsf(s[j]) < INFINITY ) - { - if( fabsf( test ) <= half_sin_cos_tan_limit ) - { - err = 0; - fail = 0; - } - } - - if( fail ) - { - if( ftz ) - { - typedef int (*CheckForSubnormal) (double,float); // If we are in fast relaxed math, we have a different calculation for the subnormal threshold. - CheckForSubnormal isFloatResultSubnormalPtr; - - if (relaxedMode) - { - isFloatResultSubnormalPtr = &IsFloatResultSubnormalAbsError; - } - else - { - isFloatResultSubnormalPtr = &IsFloatResultSubnormal; - } - // retry per section 6.5.3.2 - if( (*isFloatResultSubnormalPtr)(correct, ulps) ) - { - fail = fail && ( test != 0.0f ); - if( ! fail ) - err = 0.0f; - } - - // retry per section 6.5.3.3 - if( IsFloatSubnormal( s[j] ) ) - { - double correct2 = func.f_f( 0.0 ); - double correct3 = func.f_f( -0.0 ); - float err2; - float err3; - if( use_abs_error ) - { - err2 = Abs_Error( test, correct2 ); - err3 = Abs_Error( test, correct3 ); - } - else - { - err2 = Ulp_Error( test, correct2 ); - err3 = Ulp_Error( test, correct3 ); - } - fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - - // retry per section 6.5.3.4 - if( (*isFloatResultSubnormalPtr)(correct2, ulps ) || (*isFloatResultSubnormalPtr)(correct3, ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - } - } - } - if( fabsf(err ) > tinfo->maxError ) - { - tinfo->maxError = fabsf(err); - tinfo->maxErrorValue = s[j]; - } - if( fail ) - { - vlog_error( "\nERROR: %s%s: %f ulp error at %a (0x%8.8x): *%a vs. %a\n", job->f->name, sizeNames[k], err, ((float*) s)[j], ((uint32_t*) s)[j], ((float*) t)[j], test); - return -1; - } - } - } - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) ) - { - vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error ); - return error; - } - } - - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush 3 failed\n" ); - - - if( 0 == ( base & 0x0fffffff) ) - { - if (gVerboseBruteForce) - { - vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ulps:%5.3f ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->ulps, job->threadCount); - } else - { - vlog("." ); - } - fflush(stdout); - } - - return CL_SUCCESS; -} - - - -static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data ) -{ - const TestInfo *job = (const TestInfo *) data; - size_t buffer_elements = job->subBufferSize; - size_t buffer_size = buffer_elements * sizeof( cl_double ); - cl_uint scale = job->scale; - cl_uint base = job_id * (cl_uint) job->step; - ThreadInfo *tinfo = job->tinfo + thread_id; - float ulps = job->ulps; - dptr func = job->f->dfunc; - cl_uint j, k; - cl_int error; - int ftz = job->ftz; - - Force64BitFPUPrecision(); - - // start the map of the output arrays - cl_event e[ VECTOR_SIZE_COUNT ]; - cl_ulong *out[ VECTOR_SIZE_COUNT ]; - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error); - if( error || NULL == out[j]) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - return error; - } - } - - // Get that moving - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush failed\n" ); - - // Write the new values to the input array - cl_double *p = (cl_double*) gIn + thread_id * buffer_elements; - for( j = 0; j < buffer_elements; j++ ) - p[j] = DoubleFromUInt32( base + j * scale); - - if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error ); - return error; - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - //Wait for the map to finish - if( (error = clWaitForEvents(1, e + j) )) - { - vlog_error( "Error: clWaitForEvents failed! err: %d\n", error ); - return error; - } - if( (error = clReleaseEvent( e[j] ) )) - { - vlog_error( "Error: clReleaseEvent failed! err: %d\n", error ); - return error; - } - - // Fill the result buffer with garbage, so that old results don't carry over - uint32_t pattern = 0xffffdead; - memset_pattern4(out[j], &pattern, buffer_size); - if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) )) - { - vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error ); - return error; - } - - // run the kernel - size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j]; - cl_kernel kernel = job->k[j][thread_id]; //each worker thread has its own copy of the cl_kernel - cl_program program = job->programs[j]; - - if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; } - if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; } - - if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL))) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - return error; - } - } - - - // Get that moving - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush 2 failed\n" ); - - if( gSkipCorrectnessTesting ) - return CL_SUCCESS; - - //Calculate the correctly rounded reference result - cl_double *r = (cl_double *)gOut_Ref + thread_id * buffer_elements; - cl_double *s = (cl_double *)p; - for( j = 0; j < buffer_elements; j++ ) - r[j] = (cl_double) func.f_f( s[j] ); - - // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue. - for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ ) - { - out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error); - if( error || NULL == out[j] ) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - return error; - } - } - // Wait for the last buffer - out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error); - if( error || NULL == out[j] ) - { - vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error ); - return error; - } - - - //Verify data - cl_ulong *t = (cl_ulong *)r; - for( j = 0; j < buffer_elements; j++ ) - { - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - cl_ulong *q = out[k]; - - // If we aren't getting the correctly rounded result - if( t[j] != q[j] ) - { - cl_double test = ((cl_double*) q)[j]; - long double correct = func.f_f( s[j] ); - float err = Bruteforce_Ulp_Error_Double( test, correct ); - int fail = ! (fabsf(err) <= ulps); - - if( fail ) - { - if( ftz ) - { - // retry per section 6.5.3.2 - if( IsDoubleResultSubnormal(correct, ulps) ) - { - fail = fail && ( test != 0.0f ); - if( ! fail ) - err = 0.0f; - } - - // retry per section 6.5.3.3 - if( IsDoubleSubnormal( s[j] ) ) - { - long double correct2 = func.f_f( 0.0L ); - long double correct3 = func.f_f( -0.0L ); - float err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); - float err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); - fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps))); - if( fabsf( err2 ) < fabsf(err ) ) - err = err2; - if( fabsf( err3 ) < fabsf(err ) ) - err = err3; - - // retry per section 6.5.3.4 - if( IsDoubleResultSubnormal(correct2, ulps ) || IsDoubleResultSubnormal(correct3, ulps ) ) - { - fail = fail && ( test != 0.0f); - if( ! fail ) - err = 0.0f; - } - } - } - } - if( fabsf(err ) > tinfo->maxError ) - { - tinfo->maxError = fabsf(err); - tinfo->maxErrorValue = s[j]; - } - if( fail ) - { - vlog_error( "\nERROR: %s%s: %f ulp error at %.13la (0x%16.16llx): *%.13la vs. %.13la\n", job->f->name, sizeNames[k], err, ((cl_double*) gIn)[j], ((cl_ulong*) gIn)[j], ((cl_double*) gOut_Ref)[j], test ); - return -1; - } - } - } - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) ) - { - vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error ); - return error; - } - } - - if( (error = clFlush(tinfo->tQueue) )) - vlog( "clFlush 3 failed\n" ); - - - if( 0 == ( base & 0x0fffffff) ) - { - if (gVerboseBruteForce) - { - vlog("base:%14u step:%10u scale:%10zd buf_elements:%10u ulps:%5.3f ThreadCount:%2u\n", base, job->step, buffer_elements, job->scale, job->ulps, job->threadCount); - } else - { - vlog("." ); - } - fflush(stdout); - } - - return CL_SUCCESS; -} - -int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - size_t i, j; - float maxError = 0.0f; - double maxErrorVal = 0.0; -#if defined( __APPLE__ ) - struct timeval time_val; - gettimeofday( &time_val, NULL ); - double start_time = time_val.tv_sec + 1e-6 * time_val.tv_usec; - double end_time; -#endif - - logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); - // Init test_info - memset( &test_info, 0, sizeof( test_info ) ); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = 1; - if (gWimpyMode) - { - test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor; - } - test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - //there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ulps = f->double_ulps; - test_info.ftz = f->ftz || gForceFTZ; - test_info.relaxedMode = relaxedMode; - - // cl_kernels aren't thread safe, so we make one for each vector size for every thread - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - { - size_t array_size = test_info.threadCount * sizeof( cl_kernel ); - test_info.k[i] = (cl_kernel*)malloc( array_size ); - if( NULL == test_info.k[i] ) - { - vlog_error( "Error: Unable to allocate storage for kernels!\n" ); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset( test_info.k[i], 0, array_size ); - } - test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) ); - if( NULL == test_info.tinfo ) - { - vlog_error( "Error: Unable to allocate storage for thread specific data.\n" ); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) ); - for( i = 0; i < test_info.threadCount; i++ ) - { - cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_double), test_info.subBufferSize * sizeof( cl_double) }; - test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if( error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - /* Qualcomm fix: 9461 read-write flags must be compatible with parent buffer */ - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - /* Qualcomm fix: end */ - if( error || NULL == test_info.tinfo[i].outBuf[j] ) - { - vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size ); - goto exit; - } - } - test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error); - if( NULL == test_info.tinfo[i].tQueue || error ) - { - vlog_error( "clCreateCommandQueue failed. (%d)\n", error ); - goto exit; - } - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) - goto exit; - } - - if( !gSkipCorrectnessTesting ) - { - error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info ); - - // Accumulate the arithmetic errors - for( i = 0; i < test_info.threadCount; i++ ) - { - if( test_info.tinfo[i].maxError > maxError ) - { - maxError = test_info.tinfo[i].maxError; - maxErrorVal = test_info.tinfo[i].maxErrorValue; - } - } - - if( error ) - goto exit; - - if( gWimpyMode ) - vlog( "Wimp pass" ); - else - vlog( "passed" ); - } - - -#if defined( __APPLE__ ) - gettimeofday( &time_val, NULL); - end_time = time_val.tv_sec + 1e-6 * time_val.tv_usec; -#endif - - if( gMeasureTimes ) - { - //Init input array - double *p = (double *)gIn; - - if( strstr( f->name, "exp" ) ) - for( j = 0; j < BUFFER_SIZE / sizeof( double ); j++ ) - p[j] = (double)genrand_real1(d); - else if( strstr( f->name, "log" ) ) - for( j = 0; j < BUFFER_SIZE / sizeof( double ); j++ ) - p[j] = fabs(DoubleFromUInt32( genrand_int32(d))); - else - for( j = 0; j < BUFFER_SIZE / sizeof( double ); j++ ) - p[j] = DoubleFromUInt32( genrand_int32(d) ); - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeValues[j] * sizeof(cl_double); - size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; - if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; } - if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; } - - double sum = 0.0; - double bestTime = INFINITY; - for( i = 0; i < PERF_LOOP_COUNT; i++ ) - { - uint64_t startTime = GetTime(); - if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - - // Make sure OpenCL is done - if( (error = clFinish(gQueue) ) ) - { - vlog_error( "Error %d at clFinish\n", error ); - goto exit; - } - - uint64_t endTime = GetTime(); - double current_time = SubtractTime( endTime, startTime ); - sum += current_time; - if( current_time < bestTime ) - bestTime = current_time; - } - - if( gReportAverageTimes ) - bestTime = sum / PERF_LOOP_COUNT; - double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( double ) ); - vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] ); - } - for( ; j < gMaxVectorSizeIndex; j++ ) - vlog( "\t -- " ); - } - - if( ! gSkipCorrectnessTesting ) - vlog( "\t%8.2f @ %a", maxError, maxErrorVal ); - -#if defined( __APPLE__ ) - vlog( "\t(%2.2f seconds)", end_time - start_time ); -#endif - vlog( "\n" ); - -exit: - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - { - clReleaseProgram(test_info.programs[i]); - if( test_info.k[i] ) - { - for( j = 0; j < test_info.threadCount; j++ ) - clReleaseKernel(test_info.k[i][j]); - - free( test_info.k[i] ); - } - } - if( test_info.tinfo ) - { - for( i = 0; i < test_info.threadCount; i++ ) - { - clReleaseMemObject(test_info.tinfo[i].inBuf); - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free( test_info.tinfo ); - } - - return error; -} - - diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp new file mode 100644 index 0000000000..2d45504772 --- /dev/null +++ b/test_conformance/math_brute_force/unary_double.cpp @@ -0,0 +1,531 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "common.h" +#include "function_list.h" +#include "test_functions.h" +#include "utility.h" + +#include + +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) +{ + const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global double", + sizeNames[vectorSize], + "* out, __global double", + sizeNames[vectorSize], + "* in )\n" + "{\n" + " size_t i = get_global_id(0);\n" + " out[i] = ", + name, + "( in[i] );\n" + "}\n" }; + + const char *c3[] = { + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global double* out, __global double* in)\n" + "{\n" + " size_t i = get_global_id(0);\n" + " if( i + 1 < get_global_size(0) )\n" + " {\n" + " double3 f0 = vload3( 0, in + 3 * i );\n" + " f0 = ", + name, + "( f0 );\n" + " vstore3( f0, 0, out + 3*i );\n" + " }\n" + " else\n" + " {\n" + " size_t parity = i & 1; // Figure out how many elements are " + "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " + "buffer size \n" + " double3 f0;\n" + " switch( parity )\n" + " {\n" + " case 1:\n" + " f0 = (double3)( in[3*i], NAN, NAN ); \n" + " break;\n" + " case 0:\n" + " f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n" + " break;\n" + " }\n" + " f0 = ", + name, + "( f0 );\n" + " switch( parity )\n" + " {\n" + " case 0:\n" + " out[3*i+1] = f0.y; \n" + " // fall through\n" + " case 1:\n" + " out[3*i] = f0.x; \n" + " break;\n" + " }\n" + " }\n" + "}\n" + }; + + const char **kern = c; + size_t kernSize = sizeof(c) / sizeof(c[0]); + + if (sizeValues[vectorSize] == 3) + { + kern = c3; + kernSize = sizeof(c3) / sizeof(c3[0]); + } + + char testName[32]; + snprintf(testName, sizeof(testName) - 1, "math_kernel%s", + sizeNames[vectorSize]); + + return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, + relaxedMode); +} + +struct BuildKernelInfo +{ + cl_uint offset; // the first vector size to build + cl_uint kernel_count; + KernelMatrix &kernels; + cl_program *programs; + const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. +}; + +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +{ + BuildKernelInfo *info = (BuildKernelInfo *)p; + cl_uint i = info->offset + job_id; + return BuildKernel(info->nameInCode, i, info->kernel_count, + info->kernels[i].data(), info->programs + i, + info->relaxedMode); +} + +// Thread specific data for a worker thread +struct ThreadInfo +{ + cl_mem inBuf; // input buffer for the thread + cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread + float maxError; // max error value. Init to 0. + double maxErrorValue; // position of the max error value. Init to 0. + cl_command_queue tQueue; // per thread command queue to improve performance +}; + +struct TestInfo +{ + size_t subBufferSize; // Size of the sub-buffer in elements + const Func *f; // A pointer to the function info + cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes + + // Thread-specific kernels for each vector size: + // k[vector_size][thread_id] + KernelMatrix k; + + // Array of thread specific information + std::vector tinfo; + + cl_uint threadCount; // Number of worker threads + cl_uint jobCount; // Number of jobs + cl_uint step; // step between each chunk and the next. + cl_uint scale; // stride between individual test values + float ulps; // max_allowed ulps + int ftz; // non-zero if running in flush to zero mode + + int isRangeLimited; // 1 if the function is only to be evaluated over a + // range + float half_sin_cos_tan_limit; + bool relaxedMode; // True if test is running in relaxed mode, false + // otherwise. +}; + +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +{ + TestInfo *job = (TestInfo *)data; + size_t buffer_elements = job->subBufferSize; + size_t buffer_size = buffer_elements * sizeof(cl_double); + cl_uint scale = job->scale; + cl_uint base = job_id * (cl_uint)job->step; + ThreadInfo *tinfo = &(job->tinfo[thread_id]); + float ulps = job->ulps; + dptr func = job->f->dfunc; + cl_int error; + int ftz = job->ftz; + + Force64BitFPUPrecision(); + + // start the map of the output arrays + cl_event e[VECTOR_SIZE_COUNT]; + cl_ulong *out[VECTOR_SIZE_COUNT]; + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + out[j] = (cl_ulong *)clEnqueueMapBuffer( + tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, + buffer_size, 0, NULL, e + j, &error); + if (error || NULL == out[j]) + { + vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, + error); + return error; + } + } + + // Get that moving + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n"); + + // Write the new values to the input array + cl_double *p = (cl_double *)gIn + thread_id * buffer_elements; + for (size_t j = 0; j < buffer_elements; j++) + p[j] = DoubleFromUInt32(base + j * scale); + + if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, + buffer_size, p, 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error); + return error; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + // Wait for the map to finish + if ((error = clWaitForEvents(1, e + j))) + { + vlog_error("Error: clWaitForEvents failed! err: %d\n", error); + return error; + } + if ((error = clReleaseEvent(e[j]))) + { + vlog_error("Error: clReleaseEvent failed! err: %d\n", error); + return error; + } + + // Fill the result buffer with garbage, so that old results don't carry + // over + uint32_t pattern = 0xffffdead; + memset_pattern4(out[j], &pattern, buffer_size); + if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], + out[j], 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error); + return error; + } + + // run the kernel + size_t vectorCount = + (buffer_elements + sizeValues[j] - 1) / sizeValues[j]; + cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its + // own copy of the cl_kernel + cl_program program = job->programs[j]; + + if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]), + &tinfo->outBuf[j]))) + { + LogBuildError(program); + return error; + } + if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), + &tinfo->inBuf))) + { + LogBuildError(program); + return error; + } + + if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, + &vectorCount, NULL, 0, NULL, NULL))) + { + vlog_error("FAILED -- could not execute kernel\n"); + return error; + } + } + + + // Get that moving + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n"); + + if (gSkipCorrectnessTesting) return CL_SUCCESS; + + // Calculate the correctly rounded reference result + cl_double *r = (cl_double *)gOut_Ref + thread_id * buffer_elements; + cl_double *s = (cl_double *)p; + for (size_t j = 0; j < buffer_elements; j++) + r[j] = (cl_double)func.f_f(s[j]); + + // Read the data back -- no need to wait for the first N-1 buffers but wait + // for the last buffer. This is an in order queue. + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE; + out[j] = (cl_ulong *)clEnqueueMapBuffer( + tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0, + buffer_size, 0, NULL, NULL, &error); + if (error || NULL == out[j]) + { + vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, + error); + return error; + } + } + + // Verify data + cl_ulong *t = (cl_ulong *)r; + for (size_t j = 0; j < buffer_elements; j++) + { + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + cl_ulong *q = out[k]; + + // If we aren't getting the correctly rounded result + if (t[j] != q[j]) + { + cl_double test = ((cl_double *)q)[j]; + long double correct = func.f_f(s[j]); + float err = Bruteforce_Ulp_Error_Double(test, correct); + int fail = !(fabsf(err) <= ulps); + + if (fail) + { + if (ftz) + { + // retry per section 6.5.3.2 + if (IsDoubleResultSubnormal(correct, ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + + // retry per section 6.5.3.3 + if (IsDoubleSubnormal(s[j])) + { + long double correct2 = func.f_f(0.0L); + long double correct3 = func.f_f(-0.0L); + float err2 = + Bruteforce_Ulp_Error_Double(test, correct2); + float err3 = + Bruteforce_Ulp_Error_Double(test, correct3); + fail = fail + && ((!(fabsf(err2) <= ulps)) + && (!(fabsf(err3) <= ulps))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + + // retry per section 6.5.3.4 + if (IsDoubleResultSubnormal(correct2, ulps) + || IsDoubleResultSubnormal(correct3, ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + } + } + } + if (fabsf(err) > tinfo->maxError) + { + tinfo->maxError = fabsf(err); + tinfo->maxErrorValue = s[j]; + } + if (fail) + { + vlog_error("\nERROR: %s%s: %f ulp error at %.13la " + "(0x%16.16llx): *%.13la vs. %.13la\n", + job->f->name, sizeNames[k], err, + ((cl_double *)gIn)[j], ((cl_ulong *)gIn)[j], + ((cl_double *)gOut_Ref)[j], test); + return -1; + } + } + } + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], + out[j], 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", + j, error); + return error; + } + } + + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n"); + + + if (0 == (base & 0x0fffffff)) + { + if (gVerboseBruteForce) + { + vlog("base:%14u step:%10u scale:%10zd buf_elements:%10u ulps:%5.3f " + "ThreadCount:%2u\n", + base, job->step, buffer_elements, job->scale, job->ulps, + job->threadCount); + } + else + { + vlog("."); + } + fflush(stdout); + } + + return CL_SUCCESS; +} + +} // anonymous namespace + +int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info{}; + cl_int error; + float maxError = 0.0f; + double maxErrorVal = 0.0; + + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); + // Init test_info + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_double)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ulps = f->double_ulps; + test_info.ftz = f->ftz || gForceFTZ; + test_info.relaxedMode = relaxedMode; + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + test_info.k[i].resize(test_info.threadCount, nullptr); + } + + test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_double), + test_info.subBufferSize * sizeof(cl_double) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + // Accumulate the arithmetic errors + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + if (test_info.tinfo[i].maxError > maxError) + { + maxError = test_info.tinfo[i].maxError; + maxErrorVal = test_info.tinfo[i].maxErrorValue; + } + } + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t%8.2f @ %a", maxError, maxErrorVal); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + for (auto &kernel : test_info.k[i]) + { + clReleaseKernel(kernel); + } + } + + for (auto &threadInfo : test_info.tinfo) + { + clReleaseMemObject(threadInfo.inBuf); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(threadInfo.outBuf[j]); + clReleaseCommandQueue(threadInfo.tQueue); + } + + return error; +} diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp new file mode 100644 index 0000000000..83d27b0b9b --- /dev/null +++ b/test_conformance/math_brute_force/unary_float.cpp @@ -0,0 +1,709 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "common.h" +#include "function_list.h" +#include "test_functions.h" +#include "utility.h" + +#include + +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) +{ + const char *c[] = { "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global float", + sizeNames[vectorSize], + "* out, __global float", + sizeNames[vectorSize], + "* in )\n" + "{\n" + " size_t i = get_global_id(0);\n" + " out[i] = ", + name, + "( in[i] );\n" + "}\n" }; + + const char *c3[] = { + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global float* out, __global float* in)\n" + "{\n" + " size_t i = get_global_id(0);\n" + " if( i + 1 < get_global_size(0) )\n" + " {\n" + " float3 f0 = vload3( 0, in + 3 * i );\n" + " f0 = ", + name, + "( f0 );\n" + " vstore3( f0, 0, out + 3*i );\n" + " }\n" + " else\n" + " {\n" + " size_t parity = i & 1; // Figure out how many elements are " + "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " + "buffer size \n" + " float3 f0;\n" + " switch( parity )\n" + " {\n" + " case 1:\n" + " f0 = (float3)( in[3*i], NAN, NAN ); \n" + " break;\n" + " case 0:\n" + " f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n" + " break;\n" + " }\n" + " f0 = ", + name, + "( f0 );\n" + " switch( parity )\n" + " {\n" + " case 0:\n" + " out[3*i+1] = f0.y; \n" + " // fall through\n" + " case 1:\n" + " out[3*i] = f0.x; \n" + " break;\n" + " }\n" + " }\n" + "}\n" + }; + + const char **kern = c; + size_t kernSize = sizeof(c) / sizeof(c[0]); + + if (sizeValues[vectorSize] == 3) + { + kern = c3; + kernSize = sizeof(c3) / sizeof(c3[0]); + } + + char testName[32]; + snprintf(testName, sizeof(testName) - 1, "math_kernel%s", + sizeNames[vectorSize]); + + return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, + relaxedMode); +} + +struct BuildKernelInfo +{ + cl_uint offset; // the first vector size to build + cl_uint kernel_count; + KernelMatrix &kernels; + cl_program *programs; + const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. +}; + +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +{ + BuildKernelInfo *info = (BuildKernelInfo *)p; + cl_uint i = info->offset + job_id; + return BuildKernel(info->nameInCode, i, info->kernel_count, + info->kernels[i].data(), info->programs + i, + info->relaxedMode); +} + +// Thread specific data for a worker thread +struct ThreadInfo +{ + cl_mem inBuf; // input buffer for the thread + cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread + float maxError; // max error value. Init to 0. + double maxErrorValue; // position of the max error value. Init to 0. + cl_command_queue tQueue; // per thread command queue to improve performance +}; + +struct TestInfo +{ + size_t subBufferSize; // Size of the sub-buffer in elements + const Func *f; // A pointer to the function info + cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes + + // Thread-specific kernels for each vector size: + // k[vector_size][thread_id] + KernelMatrix k; + + // Array of thread specific information + std::vector tinfo; + + cl_uint threadCount; // Number of worker threads + cl_uint jobCount; // Number of jobs + cl_uint step; // step between each chunk and the next. + cl_uint scale; // stride between individual test values + float ulps; // max_allowed ulps + int ftz; // non-zero if running in flush to zero mode + + int isRangeLimited; // 1 if the function is only to be evaluated over a + // range + float half_sin_cos_tan_limit; + bool relaxedMode; // True if test is running in relaxed mode, false + // otherwise. +}; + +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +{ + TestInfo *job = (TestInfo *)data; + size_t buffer_elements = job->subBufferSize; + size_t buffer_size = buffer_elements * sizeof(cl_float); + cl_uint scale = job->scale; + cl_uint base = job_id * (cl_uint)job->step; + ThreadInfo *tinfo = &(job->tinfo[thread_id]); + fptr func = job->f->func; + const char *fname = job->f->name; + bool relaxedMode = job->relaxedMode; + float ulps = getAllowedUlpError(job->f, relaxedMode); + if (relaxedMode) + { + func = job->f->rfunc; + } + + cl_int error; + + int isRangeLimited = job->isRangeLimited; + float half_sin_cos_tan_limit = job->half_sin_cos_tan_limit; + int ftz = job->ftz; + + // start the map of the output arrays + cl_event e[VECTOR_SIZE_COUNT]; + cl_uint *out[VECTOR_SIZE_COUNT]; + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + out[j] = (cl_uint *)clEnqueueMapBuffer( + tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, + buffer_size, 0, NULL, e + j, &error); + if (error || NULL == out[j]) + { + vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, + error); + return error; + } + } + + // Get that moving + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n"); + + // Write the new values to the input array + cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements; + for (size_t j = 0; j < buffer_elements; j++) + { + p[j] = base + j * scale; + if (relaxedMode) + { + float p_j = *(float *)&p[j]; + if (strcmp(fname, "sin") == 0 + || strcmp(fname, "cos") + == 0) // the domain of the function is [-pi,pi] + { + if (fabs(p_j) > M_PI) ((float *)p)[j] = NAN; + } + + if (strcmp(fname, "reciprocal") == 0) + { + const float l_limit = HEX_FLT(+, 1, 0, -, 126); + const float u_limit = HEX_FLT(+, 1, 0, +, 126); + + if (fabs(p_j) < l_limit + || fabs(p_j) > u_limit) // the domain of the function is + // [2^-126,2^126] + ((float *)p)[j] = NAN; + } + } + } + + if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, + buffer_size, p, 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error); + return error; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + // Wait for the map to finish + if ((error = clWaitForEvents(1, e + j))) + { + vlog_error("Error: clWaitForEvents failed! err: %d\n", error); + return error; + } + if ((error = clReleaseEvent(e[j]))) + { + vlog_error("Error: clReleaseEvent failed! err: %d\n", error); + return error; + } + + // Fill the result buffer with garbage, so that old results don't carry + // over + uint32_t pattern = 0xffffdead; + memset_pattern4(out[j], &pattern, buffer_size); + if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], + out[j], 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error); + return error; + } + + // run the kernel + size_t vectorCount = + (buffer_elements + sizeValues[j] - 1) / sizeValues[j]; + cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its + // own copy of the cl_kernel + cl_program program = job->programs[j]; + + if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]), + &tinfo->outBuf[j]))) + { + LogBuildError(program); + return error; + } + if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), + &tinfo->inBuf))) + { + LogBuildError(program); + return error; + } + + if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, + &vectorCount, NULL, 0, NULL, NULL))) + { + vlog_error("FAILED -- could not execute kernel\n"); + return error; + } + } + + // Get that moving + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n"); + + if (gSkipCorrectnessTesting) return CL_SUCCESS; + + // Calculate the correctly rounded reference result + float *r = (float *)gOut_Ref + thread_id * buffer_elements; + float *s = (float *)p; + for (size_t j = 0; j < buffer_elements; j++) r[j] = (float)func.f_f(s[j]); + + // Read the data back -- no need to wait for the first N-1 buffers but wait + // for the last buffer. This is an in order queue. + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE; + out[j] = (cl_uint *)clEnqueueMapBuffer( + tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0, + buffer_size, 0, NULL, NULL, &error); + if (error || NULL == out[j]) + { + vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, + error); + return error; + } + } + + // Verify data + uint32_t *t = (uint32_t *)r; + for (size_t j = 0; j < buffer_elements; j++) + { + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + uint32_t *q = out[k]; + + // If we aren't getting the correctly rounded result + if (t[j] != q[j]) + { + float test = ((float *)q)[j]; + double correct = func.f_f(s[j]); + float err = Ulp_Error(test, correct); + float abs_error = Abs_Error(test, correct); + int fail = 0; + int use_abs_error = 0; + + // it is possible for the output to not match the reference + // result but for Ulp_Error to be zero, for example -1.#QNAN + // vs. 1.#QNAN. In such cases there is no failure + if (err == 0.0f) + { + fail = 0; + } + else if (relaxedMode) + { + if (strcmp(fname, "sin") == 0 || strcmp(fname, "cos") == 0) + { + fail = !(fabsf(abs_error) <= ulps); + use_abs_error = 1; + } + if (strcmp(fname, "sinpi") == 0 + || strcmp(fname, "cospi") == 0) + { + if (s[j] >= -1.0 && s[j] <= 1.0) + { + fail = !(fabsf(abs_error) <= ulps); + use_abs_error = 1; + } + } + + if (strcmp(fname, "reciprocal") == 0) + { + fail = !(fabsf(err) <= ulps); + } + + if (strcmp(fname, "exp") == 0 || strcmp(fname, "exp2") == 0) + { + float exp_error = ulps; + + if (!gIsEmbedded) + { + exp_error += floor(fabs(2 * s[j])); + } + + fail = !(fabsf(err) <= exp_error); + ulps = exp_error; + } + if (strcmp(fname, "tan") == 0) + { + + if (!gFastRelaxedDerived) + { + fail = !(fabsf(err) <= ulps); + } + // Else fast math derived implementation does not + // require ULP verification + } + if (strcmp(fname, "exp10") == 0) + { + if (!gFastRelaxedDerived) + { + fail = !(fabsf(err) <= ulps); + } + // Else fast math derived implementation does not + // require ULP verification + } + if (strcmp(fname, "log") == 0 || strcmp(fname, "log2") == 0 + || strcmp(fname, "log10") == 0) + { + if (s[j] >= 0.5 && s[j] <= 2) + { + fail = !(fabsf(abs_error) <= ulps); + } + else + { + ulps = gIsEmbedded ? job->f->float_embedded_ulps + : job->f->float_ulps; + fail = !(fabsf(err) <= ulps); + } + } + + + // fast-relaxed implies finite-only + if (IsFloatInfinity(correct) || IsFloatNaN(correct) + || IsFloatInfinity(s[j]) || IsFloatNaN(s[j])) + { + fail = 0; + err = 0; + } + } + else + { + fail = !(fabsf(err) <= ulps); + } + + // half_sin/cos/tan are only valid between +-2**16, Inf, NaN + if (isRangeLimited + && fabsf(s[j]) > MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16) + && fabsf(s[j]) < INFINITY) + { + if (fabsf(test) <= half_sin_cos_tan_limit) + { + err = 0; + fail = 0; + } + } + + if (fail) + { + if (ftz) + { + typedef int (*CheckForSubnormal)( + double, float); // If we are in fast relaxed math, + // we have a different calculation + // for the subnormal threshold. + CheckForSubnormal isFloatResultSubnormalPtr; + + if (relaxedMode) + { + isFloatResultSubnormalPtr = + &IsFloatResultSubnormalAbsError; + } + else + { + isFloatResultSubnormalPtr = &IsFloatResultSubnormal; + } + // retry per section 6.5.3.2 + if ((*isFloatResultSubnormalPtr)(correct, ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + + // retry per section 6.5.3.3 + if (IsFloatSubnormal(s[j])) + { + double correct2 = func.f_f(0.0); + double correct3 = func.f_f(-0.0); + float err2; + float err3; + if (use_abs_error) + { + err2 = Abs_Error(test, correct2); + err3 = Abs_Error(test, correct3); + } + else + { + err2 = Ulp_Error(test, correct2); + err3 = Ulp_Error(test, correct3); + } + fail = fail + && ((!(fabsf(err2) <= ulps)) + && (!(fabsf(err3) <= ulps))); + if (fabsf(err2) < fabsf(err)) err = err2; + if (fabsf(err3) < fabsf(err)) err = err3; + + // retry per section 6.5.3.4 + if ((*isFloatResultSubnormalPtr)(correct2, ulps) + || (*isFloatResultSubnormalPtr)(correct3, ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + } + } + } + if (fabsf(err) > tinfo->maxError) + { + tinfo->maxError = fabsf(err); + tinfo->maxErrorValue = s[j]; + } + if (fail) + { + vlog_error("\nERROR: %s%s: %f ulp error at %a (0x%8.8x): " + "*%a vs. %a\n", + job->f->name, sizeNames[k], err, ((float *)s)[j], + ((uint32_t *)s)[j], ((float *)t)[j], test); + return -1; + } + } + } + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], + out[j], 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", + j, error); + return error; + } + } + + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n"); + + + if (0 == (base & 0x0fffffff)) + { + if (gVerboseBruteForce) + { + vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ulps:%5.3f " + "ThreadCount:%2u\n", + base, job->step, job->scale, buffer_elements, job->ulps, + job->threadCount); + } + else + { + vlog("."); + } + fflush(stdout); + } + + return CL_SUCCESS; +} + +} // anonymous namespace + +int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info{}; + cl_int error; + float maxError = 0.0f; + double maxErrorVal = 0.0; + int skipTestingRelaxed = (relaxedMode && strcmp(f->name, "tan") == 0); + + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + + // Init test_info + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_float)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; + test_info.ftz = + f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + test_info.relaxedMode = relaxedMode; + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + test_info.k[i].resize(test_info.threadCount, nullptr); + } + + test_info.tinfo.resize(test_info.threadCount, ThreadInfo{}); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_float), + test_info.subBufferSize * sizeof(cl_float) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + } + + // Check for special cases for unary float + test_info.isRangeLimited = 0; + test_info.half_sin_cos_tan_limit = 0; + if (0 == strcmp(f->name, "half_sin") || 0 == strcmp(f->name, "half_cos")) + { + test_info.isRangeLimited = 1; + test_info.half_sin_cos_tan_limit = 1.0f + + test_info.ulps + * (FLT_EPSILON / 2.0f); // out of range results from finite + // inputs must be in [-1,1] + } + else if (0 == strcmp(f->name, "half_tan")) + { + test_info.isRangeLimited = 1; + test_info.half_sin_cos_tan_limit = + INFINITY; // out of range resut from finite inputs must be numeric + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting || skipTestingRelaxed) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + // Accumulate the arithmetic errors + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + if (test_info.tinfo[i].maxError > maxError) + { + maxError = test_info.tinfo[i].maxError; + maxErrorVal = test_info.tinfo[i].maxErrorValue; + } + } + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + if (skipTestingRelaxed) + { + vlog(" (rlx skip correctness testing)\n"); + goto exit; + } + + vlog("\t%8.2f @ %a", maxError, maxErrorVal); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + for (auto &kernel : test_info.k[i]) + { + clReleaseKernel(kernel); + } + } + + for (auto &threadInfo : test_info.tinfo) + { + clReleaseMemObject(threadInfo.inBuf); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(threadInfo.outBuf[j]); + clReleaseCommandQueue(threadInfo.tQueue); + } + + return error; +} diff --git a/test_conformance/math_brute_force/unary_two_results.cpp b/test_conformance/math_brute_force/unary_two_results.cpp deleted file mode 100644 index a86277f1d9..0000000000 --- a/test_conformance/math_brute_force/unary_two_results.cpp +++ /dev/null @@ -1,990 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "Utility.h" - -#include -#include "FunctionList.h" - -int TestFunc_Float2_Float(const Func *f, MTdata, bool relaxedMode); -int TestFunc_Double2_Double(const Func *f, MTdata, bool relaxedMode); - -extern const vtbl _unary_two_results = { "unary_two_results", - TestFunc_Float2_Float, - TestFunc_Double2_Double }; - -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode); -static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode); -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) -{ - const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* out2, __global float", sizeNames[vectorSize], "* in)\n" - "{\n" - " int i = get_global_id(0);\n" - " out[i] = ", name, "( in[i], out2 + i );\n" - "}\n" - }; - - const char *c3[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global float* out2, __global float* in)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " float3 f0 = vload3( 0, in + 3 * i );\n" - " float3 iout = NAN;\n" - " f0 = ", name, "( f0, &iout );\n" - " vstore3( f0, 0, out + 3*i );\n" - " vstore3( iout, 0, out2 + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n" - " float3 iout = NAN;\n" - " float3 f0;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " f0 = (float3)( in[3*i], NAN, NAN ); \n" - " break;\n" - " case 0:\n" - " f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n" - " break;\n" - " }\n" - " f0 = ", name, "( f0, &iout );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = f0.y; \n" - " out2[3*i+1] = iout.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = f0.x; \n" - " out2[3*i] = iout.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - const char **kern = c; - size_t kernSize = sizeof(c)/sizeof(c[0]); - - if( sizeValues[vectorSize] == 3 ) - { - kern = c3; - kernSize = sizeof(c3)/sizeof(c3[0]); - } - - char testName[32]; - snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - - return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); -} - -static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) -{ - const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* out2, __global double", sizeNames[vectorSize], "* in)\n" - "{\n" - " int i = get_global_id(0);\n" - " out[i] = ", name, "( in[i], out2 + i );\n" - "}\n" - }; - - const char *c3[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global double* out2, __global double* in)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " double3 f0 = vload3( 0, in + 3 * i );\n" - " double3 iout = NAN;\n" - " f0 = ", name, "( f0, &iout );\n" - " vstore3( f0, 0, out + 3*i );\n" - " vstore3( iout, 0, out2 + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n" - " double3 iout = NAN;\n" - " double3 f0;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " f0 = (double3)( in[3*i], NAN, NAN ); \n" - " break;\n" - " case 0:\n" - " f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n" - " break;\n" - " }\n" - " f0 = ", name, "( f0, &iout );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = f0.y; \n" - " out2[3*i+1] = iout.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = f0.x; \n" - " out2[3*i] = iout.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - const char **kern = c; - size_t kernSize = sizeof(c)/sizeof(c[0]); - - if( sizeValues[vectorSize] == 3 ) - { - kern = c3; - kernSize = sizeof(c3)/sizeof(c3[0]); - } - - char testName[32]; - snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - - return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); -} - -typedef struct BuildKernelInfo -{ - cl_uint offset; // the first vector size to build - cl_kernel *kernels; - cl_program *programs; - const char *nameInCode; - bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -}BuildKernelInfo; - -static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); -static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ) -{ - BuildKernelInfo *info = (BuildKernelInfo*) p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernels + i, - info->programs + i, info->relaxedMode); -} - -static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); -static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ) -{ - BuildKernelInfo *info = (BuildKernelInfo*) p; - cl_uint i = info->offset + job_id; - return BuildKernelDouble(info->nameInCode, i, info->kernels + i, - info->programs + i, info->relaxedMode); -} - -int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode) -{ - uint64_t i; - uint32_t j, k; - uint32_t l; - int error; - char const * testing_mode; - cl_program programs[ VECTOR_SIZE_COUNT ]; - cl_kernel kernels[ VECTOR_SIZE_COUNT ]; - float maxError0 = 0.0f; - float maxError1 = 0.0f; - int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); - float maxErrorVal0 = 0.0f; - float maxErrorVal1 = 0.0f; - size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE; - uint64_t step = bufferSize / sizeof( float ); - int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( float )) + 1); - cl_uchar overflow[BUFFER_SIZE / sizeof( float )]; - int isFract = 0 == strcmp( "fract", f->nameInCode ); - int skipNanInf = isFract && ! gInfNanSupport; - float float_ulps = getAllowedUlpError(f, relaxedMode); - - logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - if( gWimpyMode ) - { - step = (1ULL<<32) * gWimpyReductionFactor / (512); - } - - // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; - if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) - return error; -/* - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - if( (error = BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) ) - return error; -*/ - - for( i = 0; i < (1ULL<<32); i += step ) - { - //Init input array - uint32_t *p = (uint32_t *)gIn; - if( gWimpyMode ) - { - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - { - p[j] = (uint32_t) i + j * scale; - if (relaxedMode && strcmp(f->name, "sincos") == 0) - { - float pj = *(float *)&p[j]; - if (fabs(pj) > M_PI) ((float *)p)[j] = NAN; - } - } - } - else - { - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - { - p[j] = (uint32_t) i + j; - if (relaxedMode && strcmp(f->name, "sincos") == 0) - { - float pj = *(float *)&p[j]; - if (fabs(pj) > M_PI) ((float *)p)[j] = NAN; - } - } - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - - // write garbage into output arrays - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - uint32_t pattern = 0xffffdead; - memset_pattern4(gOut[j], &pattern, bufferSize); - if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j ); - goto exit; - } - - memset_pattern4(gOut2[j], &pattern, bufferSize); - if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, 0, bufferSize, gOut2[j], 0, NULL, NULL))) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", error, j ); - goto exit; - } - } - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeValues[j] * sizeof(cl_float); - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; - if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg(kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg(kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; } - - if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - } - - // Get that moving - if( (error = clFlush(gQueue) )) - vlog( "clFlush failed\n" ); - - FPU_mode_type oldMode; - RoundingMode oldRoundMode = kRoundToNearestEven; - if( isFract ) - { - //Calculate the correctly rounded reference result - memset( &oldMode, 0, sizeof( oldMode ) ); - if( ftz ) - ForceFTZ( &oldMode ); - - // Set the rounding mode to match the device - if (gIsInRTZMode) - oldRoundMode = set_round(kRoundTowardZero, kfloat); - } - - //Calculate the correctly rounded reference result - float *r = (float *)gOut_Ref; - float *r2 = (float *)gOut_Ref2; - float *s = (float *)gIn; - - if( skipNanInf ) - { - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - { - double dd; - feclearexcept(FE_OVERFLOW); - - if (relaxedMode) - r[j] = (float) f->rfunc.f_fpf( s[j], &dd ); - else - r[j] = (float) f->func.f_fpf( s[j], &dd ); - - r2[j] = (float) dd; - overflow[j] = FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW)); - } - } - else - { - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - { - double dd; - if (relaxedMode) - r[j] = (float)f->rfunc.f_fpf(s[j], &dd); - else - r[j] = (float) f->func.f_fpf( s[j], &dd ); - - r2[j] = (float) dd; - } - } - - if( isFract && ftz ) - RestoreFPState( &oldMode ); - - // Read the data back - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) ) - { - vlog_error( "ReadArray failed %d\n", error ); - goto exit; - } - if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL)) ) - { - vlog_error( "ReadArray2 failed %d\n", error ); - goto exit; - } - } - - if( gSkipCorrectnessTesting ) - { - if (isFract && gIsInRTZMode) - (void)set_round(oldRoundMode, kfloat); - break; - } - - //Verify data - uint32_t *t = (uint32_t *)gOut_Ref; - uint32_t *t2 = (uint32_t *)gOut_Ref2; - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - { - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - uint32_t *q = (uint32_t *)gOut[k]; - uint32_t *q2 = (uint32_t *)gOut2[k]; - - // If we aren't getting the correctly rounded result - if( t[j] != q[j] || t2[j] != q2[j] ) - { - double correct, correct2; - float err, err2; - float test = ((float*) q)[j]; - float test2 = ((float*) q2)[j]; - - if (relaxedMode) - correct = f->rfunc.f_fpf(s[j], &correct2); - else - correct = f->func.f_fpf( s[j], &correct2 ); - - // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow - if (relaxedMode || skipNanInf) - { - if (skipNanInf && overflow[j]) - continue; - - // Note: no double rounding here. Reference functions calculate in single precision. - if( IsFloatInfinity(correct) || IsFloatNaN(correct) || - IsFloatInfinity(correct2)|| IsFloatNaN(correct2) || - IsFloatInfinity(s[j]) || IsFloatNaN(s[j]) ) - continue; - } - - typedef int (*CheckForSubnormal) (double,float); // If we are in fast relaxed math, we have a different calculation for the subnormal threshold. - CheckForSubnormal isFloatResultSubnormalPtr; - if (relaxedMode) - { - err = Abs_Error( test, correct); - err2 = Abs_Error( test2, correct2); - isFloatResultSubnormalPtr = &IsFloatResultSubnormalAbsError; - } - else - { - err = Ulp_Error( test, correct ); - err2 = Ulp_Error( test2, correct2 ); - isFloatResultSubnormalPtr = &IsFloatResultSubnormal; - } - int fail = ! (fabsf(err) <= float_ulps && fabsf(err2) <= float_ulps); - - if( ftz ) - { - // retry per section 6.5.3.2 - if( (*isFloatResultSubnormalPtr)(correct, float_ulps) ) - { - if( (*isFloatResultSubnormalPtr) (correct2, float_ulps )) - { - fail = fail && ! ( test == 0.0f && test2 == 0.0f ); - if( ! fail ) - { - err = 0.0f; - err2 = 0.0f; - } - } - else - { - fail = fail && ! ( test == 0.0f && fabsf(err2) <= float_ulps); - if( ! fail ) - err = 0.0f; - } - } - else if( (*isFloatResultSubnormalPtr)(correct2, float_ulps ) ) - { - fail = fail && ! ( test2 == 0.0f && fabsf(err) <= float_ulps); - if( ! fail ) - err2 = 0.0f; - } - - - // retry per section 6.5.3.3 - if( IsFloatSubnormal( s[j] ) ) - { - double correctp, correctn; - double correct2p, correct2n; - float errp, err2p, errn, err2n; - - if( skipNanInf ) - feclearexcept(FE_OVERFLOW); - if (relaxedMode) - { - correctp = f->rfunc.f_fpf( 0.0, &correct2p ); - correctn = f->rfunc.f_fpf( -0.0, &correct2n ); - } - else - { - correctp = f->func.f_fpf( 0.0, &correct2p ); - correctn = f->func.f_fpf( -0.0, &correct2n ); - } - - // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow - if( skipNanInf ) - { - if( fetestexcept(FE_OVERFLOW) ) - continue; - - // Note: no double rounding here. Reference functions calculate in single precision. - if( IsFloatInfinity(correctp) || IsFloatNaN(correctp) || - IsFloatInfinity(correctn) || IsFloatNaN(correctn) || - IsFloatInfinity(correct2p) || IsFloatNaN(correct2p) || - IsFloatInfinity(correct2n) || IsFloatNaN(correct2n) ) - continue; - } - - if (relaxedMode) - { - errp = Abs_Error( test, correctp ); - err2p = Abs_Error( test, correct2p ); - errn = Abs_Error( test, correctn ); - err2n = Abs_Error( test, correct2n ); - } - else - { - errp = Ulp_Error( test, correctp ); - err2p = Ulp_Error( test, correct2p ); - errn = Ulp_Error( test, correctn ); - err2n = Ulp_Error( test, correct2n ); - } - - fail = fail && ((!(fabsf(errp) <= float_ulps)) && (!(fabsf(err2p) <= float_ulps)) && - ((!(fabsf(errn) <= float_ulps)) && (!(fabsf(err2n) <= float_ulps))) ); - if( fabsf( errp ) < fabsf(err ) ) - err = errp; - if( fabsf( errn ) < fabsf(err ) ) - err = errn; - if( fabsf( err2p ) < fabsf(err2 ) ) - err2 = err2p; - if( fabsf( err2n ) < fabsf(err2 ) ) - err2 = err2n; - - // retry per section 6.5.3.4 - if( (*isFloatResultSubnormalPtr)( correctp, float_ulps ) || (*isFloatResultSubnormalPtr)( correctn, float_ulps ) ) - { - if( (*isFloatResultSubnormalPtr)( correct2p, float_ulps ) || (*isFloatResultSubnormalPtr)( correct2n, float_ulps ) ) - { - fail = fail && !( test == 0.0f && test2 == 0.0f); - if( ! fail ) - err = err2 = 0.0f; - } - else - { - fail = fail && ! (test == 0.0f && fabsf(err2) <= float_ulps); - if( ! fail ) - err = 0.0f; - } - } - else if( (*isFloatResultSubnormalPtr)( correct2p, float_ulps ) || (*isFloatResultSubnormalPtr)( correct2n, float_ulps ) ) - { - fail = fail && ! (test2 == 0.0f && (fabsf(err) <= float_ulps)); - if( ! fail ) - err2 = 0.0f; - } - } - } - if( fabsf(err ) > maxError0 ) - { - maxError0 = fabsf(err); - maxErrorVal0 = s[j]; - } - if( fabsf(err2 ) > maxError1 ) - { - maxError1 = fabsf(err2); - maxErrorVal1 = s[j]; - } - if( fail ) - { - vlog_error( "\nERROR: %s%s: {%f, %f} ulp error at %a: *{%a, %a} vs. {%a, %a}\n", f->name, sizeNames[k], err, err2, ((float*) gIn)[j], ((float*) gOut_Ref)[j], ((float*) gOut_Ref2)[j], test, test2 ); - error = -1; - goto exit; - } - } - } - } - - if (isFract && gIsInRTZMode) - (void)set_round(oldRoundMode, kfloat); - - if( 0 == (i & 0x0fffffff) ) - { - if (gVerboseBruteForce) - { - vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, bufferSize); - } else - { - vlog("."); - } - fflush(stdout); - } - } - - if( ! gSkipCorrectnessTesting ) - { - if( gWimpyMode ) - vlog( "Wimp pass" ); - else - vlog( "passed" ); - } - - if( gMeasureTimes ) - { - //Init input array - uint32_t *p = (uint32_t *)gIn; - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - p[j] = genrand_int32(d); - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeValues[j] * sizeof(cl_float); - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; - if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg(kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j]) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; } - - double sum = 0.0; - double bestTime = INFINITY; - for( k = 0; k < PERF_LOOP_COUNT; k++ ) - { - - uint64_t startTime = GetTime(); - if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - - // Make sure OpenCL is done - if( (error = clFinish(gQueue) ) ) - { - vlog_error( "Error %d at clFinish\n", error ); - goto exit; - } - - uint64_t endTime = GetTime(); - double time = SubtractTime( endTime, startTime ); - sum += time; - if( time < bestTime ) - bestTime = time; - } - - if( gReportAverageTimes ) - bestTime = sum / PERF_LOOP_COUNT; - double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) ); - vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] ); - } - } - - if( ! gSkipCorrectnessTesting ) - vlog( "\t{%8.2f, %8.2f} @ {%a, %a}", maxError0, maxError1, maxErrorVal0, maxErrorVal1 ); - vlog( "\n" ); - -exit: - // Release - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - clReleaseKernel(kernels[k]); - clReleaseProgram(programs[k]); - } - - return error; -} - -int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode) -{ - uint64_t i; - uint32_t j, k; - int error; - cl_program programs[ VECTOR_SIZE_COUNT ]; - cl_kernel kernels[ VECTOR_SIZE_COUNT ]; - float maxError0 = 0.0f; - float maxError1 = 0.0f; - int ftz = f->ftz || gForceFTZ; - double maxErrorVal0 = 0.0f; - double maxErrorVal1 = 0.0f; - size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE; - uint64_t step = bufferSize / sizeof( cl_double ); - int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( cl_double )) + 1); - - logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); - if( gWimpyMode ) - { - step = (1ULL<<32) * gWimpyReductionFactor / (512); - } - - Force64BitFPUPrecision(); - - // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; - if( (error = ThreadPool_Do( BuildKernel_DoubleFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info ) )) - { - return error; - } -/* - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - if( (error = BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) ) - return error; -*/ - - for( i = 0; i < (1ULL<<32); i += step ) - { - //Init input array - double *p = (double *)gIn; - if( gWimpyMode ) - { - for( j = 0; j < bufferSize / sizeof( cl_double ); j++ ) - p[j] = DoubleFromUInt32((uint32_t) i + j * scale); - } - else - { - for( j = 0; j < bufferSize / sizeof( cl_double ); j++ ) - p[j] = DoubleFromUInt32((uint32_t) i + j); - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - - // write garbage into output arrays - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - uint32_t pattern = 0xffffdead; - memset_pattern4(gOut[j], &pattern, bufferSize); - if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j ); - goto exit; - } - - memset_pattern4(gOut2[j], &pattern, bufferSize); - if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, 0, bufferSize, gOut2[j], 0, NULL, NULL))) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", error, j ); - goto exit; - } - } - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeValues[j] * sizeof(cl_double); - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; - if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg(kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg(kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; } - - if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - } - - // Get that moving - if( (error = clFlush(gQueue) )) - vlog( "clFlush failed\n" ); - - //Calculate the correctly rounded reference result - double *r = (double *)gOut_Ref; - double *r2 = (double *)gOut_Ref2; - double *s = (double *)gIn; - for( j = 0; j < bufferSize / sizeof( cl_double ); j++ ) - { - long double dd; - r[j] = (double) f->dfunc.f_fpf( s[j], &dd ); - r2[j] = (double) dd; - } - - // Read the data back - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) ) - { - vlog_error( "ReadArray failed %d\n", error ); - goto exit; - } - if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL)) ) - { - vlog_error( "ReadArray2 failed %d\n", error ); - goto exit; - } - } - - if( gSkipCorrectnessTesting ) - break; - - //Verify data - uint64_t *t = (uint64_t *)gOut_Ref; - uint64_t *t2 = (uint64_t *)gOut_Ref2; - for( j = 0; j < bufferSize / sizeof( double ); j++ ) - { - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - uint64_t *q = (uint64_t *)(gOut[k]); - uint64_t *q2 = (uint64_t *)(gOut2[k]); - - // If we aren't getting the correctly rounded result - if( t[j] != q[j] || t2[j] != q2[j] ) - { - double test = ((double*) q)[j]; - double test2 = ((double*) q2)[j]; - long double correct2; - long double correct = f->dfunc.f_fpf( s[j], &correct2 ); - float err = Bruteforce_Ulp_Error_Double( test, correct ); - float err2 = Bruteforce_Ulp_Error_Double( test2, correct2 ); - int fail = ! (fabsf(err) <= f->double_ulps && fabsf(err2) <= f->double_ulps); - if( ftz ) - { - // retry per section 6.5.3.2 - if( IsDoubleResultSubnormal(correct, f->double_ulps ) ) - { - if( IsDoubleResultSubnormal( correct2, f->double_ulps ) ) - { - fail = fail && ! ( test == 0.0f && test2 == 0.0f ); - if( ! fail ) - { - err = 0.0f; - err2 = 0.0f; - } - } - else - { - fail = fail && ! ( test == 0.0f && fabsf(err2) <= f->double_ulps); - if( ! fail ) - err = 0.0f; - } - } - else if( IsDoubleResultSubnormal( correct2, f->double_ulps ) ) - { - fail = fail && ! ( test2 == 0.0f && fabsf(err) <= f->double_ulps); - if( ! fail ) - err2 = 0.0f; - } - - // retry per section 6.5.3.3 - if( IsDoubleSubnormal( s[j] ) ) - { - long double correct2p, correct2n; - long double correctp = f->dfunc.f_fpf( 0.0, &correct2p ); - long double correctn = f->dfunc.f_fpf( -0.0, &correct2n ); - float errp = Bruteforce_Ulp_Error_Double( test, correctp ); - float err2p = Bruteforce_Ulp_Error_Double( test, correct2p ); - float errn = Bruteforce_Ulp_Error_Double( test, correctn ); - float err2n = Bruteforce_Ulp_Error_Double( test, correct2n ); - fail = fail && ((!(fabsf(errp) <= f->double_ulps)) && (!(fabsf(err2p) <= f->double_ulps)) && - ((!(fabsf(errn) <= f->double_ulps)) && (!(fabsf(err2n) <= f->double_ulps))) ); - if( fabsf( errp ) < fabsf(err ) ) - err = errp; - if( fabsf( errn ) < fabsf(err ) ) - err = errn; - if( fabsf( err2p ) < fabsf(err2 ) ) - err2 = err2p; - if( fabsf( err2n ) < fabsf(err2 ) ) - err2 = err2n; - - // retry per section 6.5.3.4 - if( IsDoubleResultSubnormal( correctp, f->double_ulps ) || IsDoubleResultSubnormal( correctn, f->double_ulps ) ) - { - if( IsDoubleResultSubnormal( correct2p, f->double_ulps ) || IsDoubleResultSubnormal( correct2n, f->double_ulps ) ) - { - fail = fail && !( test == 0.0f && test2 == 0.0f); - if( ! fail ) - err = err2 = 0.0f; - } - else - { - fail = fail && ! (test == 0.0f && fabsf(err2) <= f->double_ulps); - if( ! fail ) - err = 0.0f; - } - } - else if( IsDoubleResultSubnormal( correct2p, f->double_ulps ) || IsDoubleResultSubnormal( correct2n, f->double_ulps ) ) - { - fail = fail && ! (test2 == 0.0f && (fabsf(err) <= f->double_ulps)); - if( ! fail ) - err2 = 0.0f; - } - } - } - if( fabsf(err ) > maxError0 ) - { - maxError0 = fabsf(err); - maxErrorVal0 = s[j]; - } - if( fabsf(err2 ) > maxError1 ) - { - maxError1 = fabsf(err2); - maxErrorVal1 = s[j]; - } - if( fail ) - { - vlog_error( "\nERROR: %sD%s: {%f, %f} ulp error at %.13la: *{%.13la, %.13la} vs. {%.13la, %.13la}\n", f->name, sizeNames[k], err, err2, ((double*) gIn)[j], ((double*) gOut_Ref)[j], ((double*) gOut_Ref2)[j], test, test2 ); - error = -1; - goto exit; - } - } - } - } - - if( 0 == (i & 0x0fffffff) ) - { - if (gVerboseBruteForce) - { - vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, bufferSize); - } else - { - vlog("." ); - } - fflush(stdout); - } - } - - if( ! gSkipCorrectnessTesting ) - { - if( gWimpyMode ) - vlog( "Wimp pass" ); - else - vlog( "passed" ); - } - - if( gMeasureTimes ) - { - //Init input array - double *p = (double*) gIn; - for( j = 0; j < bufferSize / sizeof( double ); j++ ) - p[j] = DoubleFromUInt32(genrand_int32(d) ); - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeValues[j] * sizeof(cl_double); - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; - if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg(kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j]) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; } - - double sum = 0.0; - double bestTime = INFINITY; - for( k = 0; k < PERF_LOOP_COUNT; k++ ) - { - - uint64_t startTime = GetTime(); - if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - - // Make sure OpenCL is done - if( (error = clFinish(gQueue) ) ) - { - vlog_error( "Error %d at clFinish\n", error ); - goto exit; - } - - uint64_t endTime = GetTime(); - double time = SubtractTime( endTime, startTime ); - sum += time; - if( time < bestTime ) - bestTime = time; - } - - if( gReportAverageTimes ) - bestTime = sum / PERF_LOOP_COUNT; - double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) ); - vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] ); - } - for( ; j < gMaxVectorSizeIndex; j++ ) - vlog( "\t -- " ); - } - - if( ! gSkipCorrectnessTesting ) - vlog( "\t{%8.2f, %8.2f} @ {%a, %a}", maxError0, maxError1, maxErrorVal0, maxErrorVal1 ); - vlog( "\n" ); - -exit: - // Release - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - clReleaseKernel(kernels[k]); - clReleaseProgram(programs[k]); - } - - return error; -} - - - diff --git a/test_conformance/math_brute_force/unary_two_results_double.cpp b/test_conformance/math_brute_force/unary_two_results_double.cpp new file mode 100644 index 0000000000..8757fbc4ee --- /dev/null +++ b/test_conformance/math_brute_force/unary_two_results_double.cpp @@ -0,0 +1,450 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "function_list.h" +#include "test_functions.h" +#include "utility.h" + +#include + +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) +{ + const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global double", + sizeNames[vectorSize], + "* out, __global double", + sizeNames[vectorSize], + "* out2, __global double", + sizeNames[vectorSize], + "* in )\n" + "{\n" + " size_t i = get_global_id(0);\n" + " out[i] = ", + name, + "( in[i], out2 + i );\n" + "}\n" }; + + const char *c3[] = { + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global double* out, __global double* out2, __global double* in)\n" + "{\n" + " size_t i = get_global_id(0);\n" + " if( i + 1 < get_global_size(0) )\n" + " {\n" + " double3 f0 = vload3( 0, in + 3 * i );\n" + " double3 iout = NAN;\n" + " f0 = ", + name, + "( f0, &iout );\n" + " vstore3( f0, 0, out + 3*i );\n" + " vstore3( iout, 0, out2 + 3*i );\n" + " }\n" + " else\n" + " {\n" + " size_t parity = i & 1; // Figure out how many elements are " + "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " + "buffer size \n" + " double3 iout = NAN;\n" + " double3 f0;\n" + " switch( parity )\n" + " {\n" + " case 1:\n" + " f0 = (double3)( in[3*i], NAN, NAN ); \n" + " break;\n" + " case 0:\n" + " f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n" + " break;\n" + " }\n" + " f0 = ", + name, + "( f0, &iout );\n" + " switch( parity )\n" + " {\n" + " case 0:\n" + " out[3*i+1] = f0.y; \n" + " out2[3*i+1] = iout.y; \n" + " // fall through\n" + " case 1:\n" + " out[3*i] = f0.x; \n" + " out2[3*i] = iout.x; \n" + " break;\n" + " }\n" + " }\n" + "}\n" + }; + + const char **kern = c; + size_t kernSize = sizeof(c) / sizeof(c[0]); + + if (sizeValues[vectorSize] == 3) + { + kern = c3; + kernSize = sizeof(c3) / sizeof(c3[0]); + } + + char testName[32]; + snprintf(testName, sizeof(testName) - 1, "math_kernel%s", + sizeNames[vectorSize]); + + return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); +} + +struct BuildKernelInfo +{ + cl_uint offset; // the first vector size to build + cl_kernel *kernels; + cl_program *programs; + const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. +}; + +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +{ + BuildKernelInfo *info = (BuildKernelInfo *)p; + cl_uint i = info->offset + job_id; + return BuildKernel(info->nameInCode, i, info->kernels + i, + info->programs + i, info->relaxedMode); +} + +} // anonymous namespace + +int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode) +{ + int error; + cl_program programs[VECTOR_SIZE_COUNT]; + cl_kernel kernels[VECTOR_SIZE_COUNT]; + float maxError0 = 0.0f; + float maxError1 = 0.0f; + int ftz = f->ftz || gForceFTZ; + double maxErrorVal0 = 0.0f; + double maxErrorVal1 = 0.0f; + uint64_t step = getTestStep(sizeof(cl_double), BUFFER_SIZE); + int scale = + (int)((1ULL << 32) / (16 * BUFFER_SIZE / sizeof(cl_double)) + 1); + + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); + + Force64BitFPUPrecision(); + + // Init the kernels + { + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + return error; + } + + for (uint64_t i = 0; i < (1ULL << 32); i += step) + { + // Init input array + double *p = (double *)gIn; + if (gWimpyMode) + { + for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++) + p[j] = DoubleFromUInt32((uint32_t)i + j * scale); + } + else + { + for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++) + p[j] = DoubleFromUInt32((uint32_t)i + j); + } + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, + BUFFER_SIZE, gIn, 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); + return error; + } + + // write garbage into output arrays + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + uint32_t pattern = 0xffffdead; + memset_pattern4(gOut[j], &pattern, BUFFER_SIZE); + if ((error = + clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, + BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", + error, j); + goto exit; + } + + memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE); + if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, + 0, BUFFER_SIZE, gOut2[j], 0, NULL, + NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", + error, j); + goto exit; + } + } + + // Run the kernels + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + size_t vectorSize = sizeValues[j] * sizeof(cl_double); + size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; + if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]), + &gOutBuffer[j]))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]), + &gOutBuffer2[j]))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer), + &gInBuffer))) + { + LogBuildError(programs[j]); + goto exit; + } + + if ((error = + clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, + &localCount, NULL, 0, NULL, NULL))) + { + vlog_error("FAILED -- could not execute kernel\n"); + goto exit; + } + } + + // Get that moving + if ((error = clFlush(gQueue))) vlog("clFlush failed\n"); + + // Calculate the correctly rounded reference result + double *r = (double *)gOut_Ref; + double *r2 = (double *)gOut_Ref2; + double *s = (double *)gIn; + for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++) + { + long double dd; + r[j] = (double)f->dfunc.f_fpf(s[j], &dd); + r2[j] = (double)dd; + } + + // Read the data back + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + if ((error = + clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, + BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + { + vlog_error("ReadArray failed %d\n", error); + goto exit; + } + if ((error = + clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, + BUFFER_SIZE, gOut2[j], 0, NULL, NULL))) + { + vlog_error("ReadArray2 failed %d\n", error); + goto exit; + } + } + + if (gSkipCorrectnessTesting) break; + + // Verify data + uint64_t *t = (uint64_t *)gOut_Ref; + uint64_t *t2 = (uint64_t *)gOut_Ref2; + for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++) + { + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + uint64_t *q = (uint64_t *)(gOut[k]); + uint64_t *q2 = (uint64_t *)(gOut2[k]); + + // If we aren't getting the correctly rounded result + if (t[j] != q[j] || t2[j] != q2[j]) + { + double test = ((double *)q)[j]; + double test2 = ((double *)q2)[j]; + long double correct2; + long double correct = f->dfunc.f_fpf(s[j], &correct2); + float err = Bruteforce_Ulp_Error_Double(test, correct); + float err2 = Bruteforce_Ulp_Error_Double(test2, correct2); + int fail = !(fabsf(err) <= f->double_ulps + && fabsf(err2) <= f->double_ulps); + if (ftz) + { + // retry per section 6.5.3.2 + if (IsDoubleResultSubnormal(correct, f->double_ulps)) + { + if (IsDoubleResultSubnormal(correct2, + f->double_ulps)) + { + fail = fail && !(test == 0.0f && test2 == 0.0f); + if (!fail) + { + err = 0.0f; + err2 = 0.0f; + } + } + else + { + fail = fail + && !(test == 0.0f + && fabsf(err2) <= f->double_ulps); + if (!fail) err = 0.0f; + } + } + else if (IsDoubleResultSubnormal(correct2, + f->double_ulps)) + { + fail = fail + && !(test2 == 0.0f + && fabsf(err) <= f->double_ulps); + if (!fail) err2 = 0.0f; + } + + // retry per section 6.5.3.3 + if (IsDoubleSubnormal(s[j])) + { + long double correct2p, correct2n; + long double correctp = + f->dfunc.f_fpf(0.0, &correct2p); + long double correctn = + f->dfunc.f_fpf(-0.0, &correct2n); + float errp = + Bruteforce_Ulp_Error_Double(test, correctp); + float err2p = + Bruteforce_Ulp_Error_Double(test, correct2p); + float errn = + Bruteforce_Ulp_Error_Double(test, correctn); + float err2n = + Bruteforce_Ulp_Error_Double(test, correct2n); + fail = fail + && ((!(fabsf(errp) <= f->double_ulps)) + && (!(fabsf(err2p) <= f->double_ulps)) + && ((!(fabsf(errn) <= f->double_ulps)) + && (!(fabsf(err2n) + <= f->double_ulps)))); + if (fabsf(errp) < fabsf(err)) err = errp; + if (fabsf(errn) < fabsf(err)) err = errn; + if (fabsf(err2p) < fabsf(err2)) err2 = err2p; + if (fabsf(err2n) < fabsf(err2)) err2 = err2n; + + // retry per section 6.5.3.4 + if (IsDoubleResultSubnormal(correctp, + f->double_ulps) + || IsDoubleResultSubnormal(correctn, + f->double_ulps)) + { + if (IsDoubleResultSubnormal(correct2p, + f->double_ulps) + || IsDoubleResultSubnormal(correct2n, + f->double_ulps)) + { + fail = fail + && !(test == 0.0f && test2 == 0.0f); + if (!fail) err = err2 = 0.0f; + } + else + { + fail = fail + && !(test == 0.0f + && fabsf(err2) <= f->double_ulps); + if (!fail) err = 0.0f; + } + } + else if (IsDoubleResultSubnormal(correct2p, + f->double_ulps) + || IsDoubleResultSubnormal(correct2n, + f->double_ulps)) + { + fail = fail + && !(test2 == 0.0f + && (fabsf(err) <= f->double_ulps)); + if (!fail) err2 = 0.0f; + } + } + } + if (fabsf(err) > maxError0) + { + maxError0 = fabsf(err); + maxErrorVal0 = s[j]; + } + if (fabsf(err2) > maxError1) + { + maxError1 = fabsf(err2); + maxErrorVal1 = s[j]; + } + if (fail) + { + vlog_error( + "\nERROR: %sD%s: {%f, %f} ulp error at %.13la: " + "*{%.13la, %.13la} vs. {%.13la, %.13la}\n", + f->name, sizeNames[k], err, err2, + ((double *)gIn)[j], ((double *)gOut_Ref)[j], + ((double *)gOut_Ref2)[j], test, test2); + error = -1; + goto exit; + } + } + } + } + + if (0 == (i & 0x0fffffff)) + { + if (gVerboseBruteForce) + { + vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, + BUFFER_SIZE); + } + else + { + vlog("."); + } + fflush(stdout); + } + } + + if (!gSkipCorrectnessTesting) + { + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t{%8.2f, %8.2f} @ {%a, %a}", maxError0, maxError1, maxErrorVal0, + maxErrorVal1); + } + + vlog("\n"); + +exit: + // Release + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + clReleaseKernel(kernels[k]); + clReleaseProgram(programs[k]); + } + + return error; +} diff --git a/test_conformance/math_brute_force/unary_two_results_float.cpp b/test_conformance/math_brute_force/unary_two_results_float.cpp new file mode 100644 index 0000000000..a54bd024c2 --- /dev/null +++ b/test_conformance/math_brute_force/unary_two_results_float.cpp @@ -0,0 +1,582 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "function_list.h" +#include "test_functions.h" +#include "utility.h" + +#include + +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) +{ + const char *c[] = { "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global float", + sizeNames[vectorSize], + "* out, __global float", + sizeNames[vectorSize], + "* out2, __global float", + sizeNames[vectorSize], + "* in )\n" + "{\n" + " size_t i = get_global_id(0);\n" + " out[i] = ", + name, + "( in[i], out2 + i );\n" + "}\n" }; + + const char *c3[] = { + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global float* out, __global float* out2, __global float* in)\n" + "{\n" + " size_t i = get_global_id(0);\n" + " if( i + 1 < get_global_size(0) )\n" + " {\n" + " float3 f0 = vload3( 0, in + 3 * i );\n" + " float3 iout = NAN;\n" + " f0 = ", + name, + "( f0, &iout );\n" + " vstore3( f0, 0, out + 3*i );\n" + " vstore3( iout, 0, out2 + 3*i );\n" + " }\n" + " else\n" + " {\n" + " size_t parity = i & 1; // Figure out how many elements are " + "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " + "buffer size \n" + " float3 iout = NAN;\n" + " float3 f0;\n" + " switch( parity )\n" + " {\n" + " case 1:\n" + " f0 = (float3)( in[3*i], NAN, NAN ); \n" + " break;\n" + " case 0:\n" + " f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n" + " break;\n" + " }\n" + " f0 = ", + name, + "( f0, &iout );\n" + " switch( parity )\n" + " {\n" + " case 0:\n" + " out[3*i+1] = f0.y; \n" + " out2[3*i+1] = iout.y; \n" + " // fall through\n" + " case 1:\n" + " out[3*i] = f0.x; \n" + " out2[3*i] = iout.x; \n" + " break;\n" + " }\n" + " }\n" + "}\n" + }; + + const char **kern = c; + size_t kernSize = sizeof(c) / sizeof(c[0]); + + if (sizeValues[vectorSize] == 3) + { + kern = c3; + kernSize = sizeof(c3) / sizeof(c3[0]); + } + + char testName[32]; + snprintf(testName, sizeof(testName) - 1, "math_kernel%s", + sizeNames[vectorSize]); + + return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); +} + +struct BuildKernelInfo +{ + cl_uint offset; // the first vector size to build + cl_kernel *kernels; + cl_program *programs; + const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. +}; + +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +{ + BuildKernelInfo *info = (BuildKernelInfo *)p; + cl_uint i = info->offset + job_id; + return BuildKernel(info->nameInCode, i, info->kernels + i, + info->programs + i, info->relaxedMode); +} + +} // anonymous namespace + +int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode) +{ + int error; + cl_program programs[VECTOR_SIZE_COUNT]; + cl_kernel kernels[VECTOR_SIZE_COUNT]; + float maxError0 = 0.0f; + float maxError1 = 0.0f; + int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + float maxErrorVal0 = 0.0f; + float maxErrorVal1 = 0.0f; + uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE); + int scale = (int)((1ULL << 32) / (16 * BUFFER_SIZE / sizeof(float)) + 1); + cl_uchar overflow[BUFFER_SIZE / sizeof(float)]; + int isFract = 0 == strcmp("fract", f->nameInCode); + int skipNanInf = isFract && !gInfNanSupport; + + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + + float float_ulps = getAllowedUlpError(f, relaxedMode); + // Init the kernels + { + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + return error; + } + + for (uint64_t i = 0; i < (1ULL << 32); i += step) + { + // Init input array + uint32_t *p = (uint32_t *)gIn; + if (gWimpyMode) + { + for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++) + { + p[j] = (uint32_t)i + j * scale; + if (relaxedMode && strcmp(f->name, "sincos") == 0) + { + float pj = *(float *)&p[j]; + if (fabs(pj) > M_PI) ((float *)p)[j] = NAN; + } + } + } + else + { + for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++) + { + p[j] = (uint32_t)i + j; + if (relaxedMode && strcmp(f->name, "sincos") == 0) + { + float pj = *(float *)&p[j]; + if (fabs(pj) > M_PI) ((float *)p)[j] = NAN; + } + } + } + + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, + BUFFER_SIZE, gIn, 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); + return error; + } + + // write garbage into output arrays + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + uint32_t pattern = 0xffffdead; + memset_pattern4(gOut[j], &pattern, BUFFER_SIZE); + if ((error = + clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, + BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", + error, j); + goto exit; + } + + memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE); + if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, + 0, BUFFER_SIZE, gOut2[j], 0, NULL, + NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", + error, j); + goto exit; + } + } + + // Run the kernels + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + size_t vectorSize = sizeValues[j] * sizeof(cl_float); + size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; + if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]), + &gOutBuffer[j]))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]), + &gOutBuffer2[j]))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer), + &gInBuffer))) + { + LogBuildError(programs[j]); + goto exit; + } + + if ((error = + clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, + &localCount, NULL, 0, NULL, NULL))) + { + vlog_error("FAILED -- could not execute kernel\n"); + goto exit; + } + } + + // Get that moving + if ((error = clFlush(gQueue))) vlog("clFlush failed\n"); + + FPU_mode_type oldMode; + RoundingMode oldRoundMode = kRoundToNearestEven; + if (isFract) + { + // Calculate the correctly rounded reference result + memset(&oldMode, 0, sizeof(oldMode)); + if (ftz) ForceFTZ(&oldMode); + + // Set the rounding mode to match the device + if (gIsInRTZMode) + oldRoundMode = set_round(kRoundTowardZero, kfloat); + } + + // Calculate the correctly rounded reference result + float *r = (float *)gOut_Ref; + float *r2 = (float *)gOut_Ref2; + float *s = (float *)gIn; + + if (skipNanInf) + { + for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++) + { + double dd; + feclearexcept(FE_OVERFLOW); + + if (relaxedMode) + r[j] = (float)f->rfunc.f_fpf(s[j], &dd); + else + r[j] = (float)f->func.f_fpf(s[j], &dd); + + r2[j] = (float)dd; + overflow[j] = + FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW)); + } + } + else + { + for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++) + { + double dd; + if (relaxedMode) + r[j] = (float)f->rfunc.f_fpf(s[j], &dd); + else + r[j] = (float)f->func.f_fpf(s[j], &dd); + + r2[j] = (float)dd; + } + } + + if (isFract && ftz) RestoreFPState(&oldMode); + + // Read the data back + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + if ((error = + clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, + BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + { + vlog_error("ReadArray failed %d\n", error); + goto exit; + } + if ((error = + clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, + BUFFER_SIZE, gOut2[j], 0, NULL, NULL))) + { + vlog_error("ReadArray2 failed %d\n", error); + goto exit; + } + } + + if (gSkipCorrectnessTesting) + { + if (isFract && gIsInRTZMode) (void)set_round(oldRoundMode, kfloat); + break; + } + + // Verify data + uint32_t *t = (uint32_t *)gOut_Ref; + uint32_t *t2 = (uint32_t *)gOut_Ref2; + for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++) + { + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + uint32_t *q = (uint32_t *)gOut[k]; + uint32_t *q2 = (uint32_t *)gOut2[k]; + + // If we aren't getting the correctly rounded result + if (t[j] != q[j] || t2[j] != q2[j]) + { + double correct, correct2; + float err, err2; + float test = ((float *)q)[j]; + float test2 = ((float *)q2)[j]; + + if (relaxedMode) + correct = f->rfunc.f_fpf(s[j], &correct2); + else + correct = f->func.f_fpf(s[j], &correct2); + + // Per section 10 paragraph 6, accept any result if an input + // or output is a infinity or NaN or overflow + if (relaxedMode || skipNanInf) + { + if (skipNanInf && overflow[j]) continue; + // Note: no double rounding here. Reference functions + // calculate in single precision. + if (IsFloatInfinity(correct) || IsFloatNaN(correct) + || IsFloatInfinity(correct2) || IsFloatNaN(correct2) + || IsFloatInfinity(s[j]) || IsFloatNaN(s[j])) + continue; + } + + typedef int (*CheckForSubnormal)( + double, float); // If we are in fast relaxed math, we + // have a different calculation for the + // subnormal threshold. + CheckForSubnormal isFloatResultSubnormalPtr; + if (relaxedMode) + { + err = Abs_Error(test, correct); + err2 = Abs_Error(test2, correct2); + isFloatResultSubnormalPtr = + &IsFloatResultSubnormalAbsError; + } + else + { + err = Ulp_Error(test, correct); + err2 = Ulp_Error(test2, correct2); + isFloatResultSubnormalPtr = &IsFloatResultSubnormal; + } + int fail = !(fabsf(err) <= float_ulps + && fabsf(err2) <= float_ulps); + + if (ftz) + { + // retry per section 6.5.3.2 + if ((*isFloatResultSubnormalPtr)(correct, float_ulps)) + { + if ((*isFloatResultSubnormalPtr)(correct2, + float_ulps)) + { + fail = fail && !(test == 0.0f && test2 == 0.0f); + if (!fail) + { + err = 0.0f; + err2 = 0.0f; + } + } + else + { + fail = fail + && !(test == 0.0f + && fabsf(err2) <= float_ulps); + if (!fail) err = 0.0f; + } + } + else if ((*isFloatResultSubnormalPtr)(correct2, + float_ulps)) + { + fail = fail + && !(test2 == 0.0f && fabsf(err) <= float_ulps); + if (!fail) err2 = 0.0f; + } + + + // retry per section 6.5.3.3 + if (IsFloatSubnormal(s[j])) + { + double correctp, correctn; + double correct2p, correct2n; + float errp, err2p, errn, err2n; + + if (skipNanInf) feclearexcept(FE_OVERFLOW); + if (relaxedMode) + { + correctp = f->rfunc.f_fpf(0.0, &correct2p); + correctn = f->rfunc.f_fpf(-0.0, &correct2n); + } + else + { + correctp = f->func.f_fpf(0.0, &correct2p); + correctn = f->func.f_fpf(-0.0, &correct2n); + } + + // Per section 10 paragraph 6, accept any result if + // an input or output is a infinity or NaN or + // overflow + if (skipNanInf) + { + if (fetestexcept(FE_OVERFLOW)) continue; + + // Note: no double rounding here. Reference + // functions calculate in single precision. + if (IsFloatInfinity(correctp) + || IsFloatNaN(correctp) + || IsFloatInfinity(correctn) + || IsFloatNaN(correctn) + || IsFloatInfinity(correct2p) + || IsFloatNaN(correct2p) + || IsFloatInfinity(correct2n) + || IsFloatNaN(correct2n)) + continue; + } + + if (relaxedMode) + { + errp = Abs_Error(test, correctp); + err2p = Abs_Error(test, correct2p); + errn = Abs_Error(test, correctn); + err2n = Abs_Error(test, correct2n); + } + else + { + errp = Ulp_Error(test, correctp); + err2p = Ulp_Error(test, correct2p); + errn = Ulp_Error(test, correctn); + err2n = Ulp_Error(test, correct2n); + } + + fail = fail + && ((!(fabsf(errp) <= float_ulps)) + && (!(fabsf(err2p) <= float_ulps)) + && ((!(fabsf(errn) <= float_ulps)) + && (!(fabsf(err2n) <= float_ulps)))); + if (fabsf(errp) < fabsf(err)) err = errp; + if (fabsf(errn) < fabsf(err)) err = errn; + if (fabsf(err2p) < fabsf(err2)) err2 = err2p; + if (fabsf(err2n) < fabsf(err2)) err2 = err2n; + + // retry per section 6.5.3.4 + if ((*isFloatResultSubnormalPtr)(correctp, + float_ulps) + || (*isFloatResultSubnormalPtr)(correctn, + float_ulps)) + { + if ((*isFloatResultSubnormalPtr)(correct2p, + float_ulps) + || (*isFloatResultSubnormalPtr)(correct2n, + float_ulps)) + { + fail = fail + && !(test == 0.0f && test2 == 0.0f); + if (!fail) err = err2 = 0.0f; + } + else + { + fail = fail + && !(test == 0.0f + && fabsf(err2) <= float_ulps); + if (!fail) err = 0.0f; + } + } + else if ((*isFloatResultSubnormalPtr)(correct2p, + float_ulps) + || (*isFloatResultSubnormalPtr)( + correct2n, float_ulps)) + { + fail = fail + && !(test2 == 0.0f + && (fabsf(err) <= float_ulps)); + if (!fail) err2 = 0.0f; + } + } + } + if (fabsf(err) > maxError0) + { + maxError0 = fabsf(err); + maxErrorVal0 = s[j]; + } + if (fabsf(err2) > maxError1) + { + maxError1 = fabsf(err2); + maxErrorVal1 = s[j]; + } + if (fail) + { + vlog_error("\nERROR: %s%s: {%f, %f} ulp error at %a: " + "*{%a, %a} vs. {%a, %a}\n", + f->name, sizeNames[k], err, err2, + ((float *)gIn)[j], ((float *)gOut_Ref)[j], + ((float *)gOut_Ref2)[j], test, test2); + error = -1; + goto exit; + } + } + } + } + + if (isFract && gIsInRTZMode) (void)set_round(oldRoundMode, kfloat); + + if (0 == (i & 0x0fffffff)) + { + if (gVerboseBruteForce) + { + vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, + BUFFER_SIZE); + } + else + { + vlog("."); + } + fflush(stdout); + } + } + + if (!gSkipCorrectnessTesting) + { + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t{%8.2f, %8.2f} @ {%a, %a}", maxError0, maxError1, maxErrorVal0, + maxErrorVal1); + } + + vlog("\n"); + +exit: + // Release + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + clReleaseKernel(kernels[k]); + clReleaseProgram(programs[k]); + } + + return error; +} diff --git a/test_conformance/math_brute_force/unary_two_results_i.cpp b/test_conformance/math_brute_force/unary_two_results_i.cpp deleted file mode 100644 index 108be6a4e5..0000000000 --- a/test_conformance/math_brute_force/unary_two_results_i.cpp +++ /dev/null @@ -1,808 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "Utility.h" - -#include -#include -#include "FunctionList.h" - -int TestFunc_FloatI_Float(const Func *f, MTdata, bool relaxedMode); -int TestFunc_DoubleI_Double(const Func *f, MTdata, bool relaxedMode); - -extern const vtbl _unary_two_results_i = { "unary_two_results_i", - TestFunc_FloatI_Float, - TestFunc_DoubleI_Double }; - -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode); -static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode); - -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) -{ - const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global int", sizeNames[vectorSize], "* out2, __global float", sizeNames[vectorSize], "* in)\n" - "{\n" - " int i = get_global_id(0);\n" - " out[i] = ", name, "( in[i], out2 + i );\n" - "}\n" - }; - const char *c3[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global int* out2, __global float* in)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " float3 f0 = vload3( 0, in + 3 * i );\n" - " int3 iout = INT_MIN;\n" - " f0 = ", name, "( f0, &iout );\n" - " vstore3( f0, 0, out + 3*i );\n" - " vstore3( iout, 0, out2 + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n" - " int3 iout = INT_MIN;\n" - " float3 f0;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " f0 = (float3)( in[3*i], NAN, NAN ); \n" - " break;\n" - " case 0:\n" - " f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n" - " break;\n" - " }\n" - " f0 = ", name, "( f0, &iout );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = f0.y; \n" - " out2[3*i+1] = iout.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = f0.x; \n" - " out2[3*i] = iout.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - const char **kern = c; - size_t kernSize = sizeof(c)/sizeof(c[0]); - - if( sizeValues[vectorSize] == 3 ) - { - kern = c3; - kernSize = sizeof(c3)/sizeof(c3[0]); - } - - char testName[32]; - snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - - return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); -} - -static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) -{ - const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global int", sizeNames[vectorSize], "* out2, __global double", sizeNames[vectorSize], "* in)\n" - "{\n" - " int i = get_global_id(0);\n" - " out[i] = ", name, "( in[i], out2 + i );\n" - "}\n" - }; - const char *c3[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global int* out2, __global double* in)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " double3 f0 = vload3( 0, in + 3 * i );\n" - " int3 iout = INT_MIN;\n" - " f0 = ", name, "( f0, &iout );\n" - " vstore3( f0, 0, out + 3*i );\n" - " vstore3( iout, 0, out2 + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n" - " int3 iout = INT_MIN;\n" - " double3 f0;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " f0 = (double3)( in[3*i], NAN, NAN ); \n" - " break;\n" - " case 0:\n" - " f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n" - " break;\n" - " }\n" - " f0 = ", name, "( f0, &iout );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = f0.y; \n" - " out2[3*i+1] = iout.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = f0.x; \n" - " out2[3*i] = iout.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - const char **kern = c; - size_t kernSize = sizeof(c)/sizeof(c[0]); - - if( sizeValues[vectorSize] == 3 ) - { - kern = c3; - kernSize = sizeof(c3)/sizeof(c3[0]); - } - - char testName[32]; - snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - - return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); -} - -typedef struct BuildKernelInfo -{ - cl_uint offset; // the first vector size to build - cl_kernel *kernels; - cl_program *programs; - const char *nameInCode; - bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -}BuildKernelInfo; - -static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); -static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ) -{ - BuildKernelInfo *info = (BuildKernelInfo*) p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernels + i, - info->programs + i, info->relaxedMode); -} - -static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); -static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ) -{ - BuildKernelInfo *info = (BuildKernelInfo*) p; - cl_uint i = info->offset + job_id; - return BuildKernelDouble(info->nameInCode, i, info->kernels + i, - info->programs + i, info->relaxedMode); -} - -cl_ulong abs_cl_long( cl_long i ); -cl_ulong abs_cl_long( cl_long i ) -{ - cl_long mask = i >> 63; - return (i ^ mask) - mask; -} - -int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode) -{ - uint64_t i; - uint32_t j, k; - int error; - cl_program programs[ VECTOR_SIZE_COUNT ]; - cl_kernel kernels[ VECTOR_SIZE_COUNT ]; - float maxError = 0.0f; - int64_t maxError2 = 0; - int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); - float maxErrorVal = 0.0f; - float maxErrorVal2 = 0.0f; - size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE; - float float_ulps; - uint64_t step = bufferSize / sizeof( float ); - int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( float )) + 1); - cl_ulong maxiError; - - logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - if( gWimpyMode ) - { - step = (1ULL<<32) * gWimpyReductionFactor / (512); - } - if( gIsEmbedded ) - float_ulps = f->float_embedded_ulps; - else - float_ulps = f->float_ulps; - - maxiError = float_ulps == INFINITY ? CL_ULONG_MAX : 0; - - // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; - if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) - return error; -/* - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - if( (error = BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) ) - return error; -*/ - - for( i = 0; i < (1ULL<<32); i += step ) - { - //Init input array - uint32_t *p = (uint32_t *)gIn; - if( gWimpyMode ) - { - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - p[j] = (uint32_t) i + j * scale; - } - else - { - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - p[j] = (uint32_t) i + j; - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - - // write garbage into output arrays - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - uint32_t pattern = 0xffffdead; - memset_pattern4(gOut[j], &pattern, bufferSize); - if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j ); - goto exit; - } - - memset_pattern4(gOut2[j], &pattern, bufferSize); - if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, 0, bufferSize, gOut2[j], 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", error, j ); - goto exit; - } - } - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeValues[j] * sizeof(cl_float); - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; - if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; } - - if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - } - - // Get that moving - if( (error = clFlush(gQueue) )) - vlog( "clFlush failed\n" ); - - //Calculate the correctly rounded reference result - float *r = (float *)gOut_Ref; - int *r2 = (int *)gOut_Ref2; - float *s = (float *)gIn; - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - r[j] = (float) f->func.f_fpI( s[j], r2+j ); - - // Read the data back - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) ) - { - vlog_error( "ReadArray failed %d\n", error ); - goto exit; - } - if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL)) ) - { - vlog_error( "ReadArray2 failed %d\n", error ); - goto exit; - } - } - - if( gSkipCorrectnessTesting ) - break; - - //Verify data - uint32_t *t = (uint32_t *)gOut_Ref; - int32_t *t2 = (int32_t *)gOut_Ref2; - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - { - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - uint32_t *q = (uint32_t *)(gOut[k]); - int32_t *q2 = (int32_t *)(gOut2[k]); - - // If we aren't getting the correctly rounded result - if( t[j] != q[j] || t2[j] != q2[j] ) - { - float test = ((float*) q)[j]; - int correct2 = INT_MIN; - double correct = f->func.f_fpI( s[j], &correct2 ); - float err = Ulp_Error( test, correct ); - cl_long iErr = (int64_t) q2[j] - (int64_t) correct2; - int fail = ! (fabsf(err) <= float_ulps && abs_cl_long( iErr ) <= maxiError ); - if( ftz ) - { - // retry per section 6.5.3.2 - if( IsFloatResultSubnormal(correct, float_ulps ) ) - { - fail = fail && ! ( test == 0.0f && iErr == 0 ); - if( ! fail ) - err = 0.0f; - } - - // retry per section 6.5.3.3 - if( IsFloatSubnormal( s[j] ) ) - { - int correct5, correct6; - double correct3 = f->func.f_fpI( 0.0, &correct5 ); - double correct4 = f->func.f_fpI( -0.0, &correct6 ); - float err2 = Ulp_Error( test, correct3 ); - float err3 = Ulp_Error( test, correct4 ); - cl_long iErr2 = (long long) q2[j] - (long long) correct5; - cl_long iErr3 = (long long) q2[j] - (long long) correct6; - - // Did +0 work? - if( fabsf(err2) <= float_ulps && abs_cl_long( iErr2 ) <= maxiError ) - { - err = err2; - iErr = iErr2; - fail = 0; - } - // Did -0 work? - else if(fabsf(err3) <= float_ulps && abs_cl_long( iErr3 ) <= maxiError) - { - err = err3; - iErr = iErr3; - fail = 0; - } - - // retry per section 6.5.3.4 - if( fail && (IsFloatResultSubnormal(correct2, float_ulps ) || IsFloatResultSubnormal(correct3, float_ulps )) ) - { - fail = fail && ! ( test == 0.0f && (abs_cl_long( iErr2 ) <= maxiError || abs_cl_long( iErr3 ) <= maxiError) ); - if( ! fail ) - { - err = 0.0f; - iErr = 0; - } - } - } - } - if( fabsf(err ) > maxError ) - { - maxError = fabsf(err); - maxErrorVal = s[j]; - } - if( llabs(iErr) > maxError2 ) - { - maxError2 = llabs(iErr ); - maxErrorVal2 = s[j]; - } - - if( fail ) - { - vlog_error( "\nERROR: %s%s: {%f, %d} ulp error at %a: *{%a, %d} vs. {%a, %d}\n", f->name, sizeNames[k], err, (int) iErr, ((float*) gIn)[j], ((float*) gOut_Ref)[j], ((int*) gOut_Ref2)[j], test, q2[j] ); - error = -1; - goto exit; - } - } - } - } - - if( 0 == (i & 0x0fffffff) ) - { - if (gVerboseBruteForce) - { - vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, bufferSize); - } else - { - vlog("." ); - } - fflush(stdout); - } - } - - if( ! gSkipCorrectnessTesting ) - { - if( gWimpyMode ) - vlog( "Wimp pass" ); - else - vlog( "passed" ); - } - - if( gMeasureTimes ) - { - //Init input array - uint32_t *p = (uint32_t *)gIn; - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - p[j] = genrand_int32(d); - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeValues[j] * sizeof(cl_float); - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; - if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; } - - double sum = 0.0; - double bestTime = INFINITY; - for( k = 0; k < PERF_LOOP_COUNT; k++ ) - { - uint64_t startTime = GetTime(); - if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - - // Make sure OpenCL is done - if( (error = clFinish(gQueue) ) ) - { - vlog_error( "Error %d at clFinish\n", error ); - goto exit; - } - - uint64_t endTime = GetTime(); - double time = SubtractTime( endTime, startTime ); - sum += time; - if( time < bestTime ) - bestTime = time; - } - - if( gReportAverageTimes ) - bestTime = sum / PERF_LOOP_COUNT; - double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) ); - vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] ); - } - } - - if( ! gSkipCorrectnessTesting ) - vlog( "\t{%8.2f, %lld} @ %a", maxError, maxError2, maxErrorVal ); - vlog( "\n" ); - -exit: - // Release - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - clReleaseKernel(kernels[k]); - clReleaseProgram(programs[k]); - } - - return error; -} - -int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode) -{ - uint64_t i; - uint32_t j, k; - int error; - cl_program programs[ VECTOR_SIZE_COUNT ]; - cl_kernel kernels[ VECTOR_SIZE_COUNT ]; - float maxError = 0.0f; - int64_t maxError2 = 0; - int ftz = f->ftz || gForceFTZ; - double maxErrorVal = 0.0f; - double maxErrorVal2 = 0.0f; - cl_ulong maxiError = f->double_ulps == INFINITY ? CL_ULONG_MAX : 0; - size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE; - - uint64_t step = bufferSize / sizeof( double ); - int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( double )) + 1); - - logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); - if( gWimpyMode ) - { - step = (1ULL<<32) * gWimpyReductionFactor / (512); - } - - Force64BitFPUPrecision(); - - // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; - if( (error = ThreadPool_Do( BuildKernel_DoubleFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info ) )) - { - return error; - } -/* - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - if( (error = BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) ) - return error; -*/ - - for( i = 0; i < (1ULL<<32); i += step ) - { - //Init input array - double *p = (double *)gIn; - if( gWimpyMode ) - { - for( j = 0; j < bufferSize / sizeof( double ); j++ ) - p[j] = DoubleFromUInt32((uint32_t) i + j * scale); - } - else - { - for( j = 0; j < bufferSize / sizeof( double ); j++ ) - p[j] = DoubleFromUInt32((uint32_t) i + j); - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - - // write garbage into output arrays - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - uint32_t pattern = 0xffffdead; - memset_pattern4(gOut[j], &pattern, bufferSize); - if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j ); - goto exit; - } - - memset_pattern4(gOut2[j], &pattern, bufferSize); - if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, 0, bufferSize, gOut2[j], 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", error, j ); - goto exit; - } - } - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeValues[j] * sizeof(cl_double); - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; - if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; } - - if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - } - - // Get that moving - if( (error = clFlush(gQueue) )) - vlog( "clFlush failed\n" ); - - //Calculate the correctly rounded reference result - double *r = (double *)gOut_Ref; - int *r2 = (int *)gOut_Ref2; - double *s = (double *)gIn; - for( j = 0; j < bufferSize / sizeof( double ); j++ ) - r[j] = (double) f->dfunc.f_fpI( s[j], r2+j ); - - // Read the data back - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) ) - { - vlog_error( "ReadArray failed %d\n", error ); - goto exit; - } - if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL)) ) - { - vlog_error( "ReadArray2 failed %d\n", error ); - goto exit; - } - } - - if( gSkipCorrectnessTesting ) - break; - - //Verify data - uint64_t *t = (uint64_t *)gOut_Ref; - int32_t *t2 = (int32_t *)gOut_Ref2; - for( j = 0; j < bufferSize / sizeof( double ); j++ ) - { - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - uint64_t *q = (uint64_t *)(gOut[k]); - int32_t *q2 = (int32_t *)(gOut2[k]); - - // If we aren't getting the correctly rounded result - if( t[j] != q[j] || t2[j] != q2[j] ) - { - double test = ((double*) q)[j]; - int correct2 = INT_MIN; - long double correct = f->dfunc.f_fpI( s[j], &correct2 ); - float err = Bruteforce_Ulp_Error_Double( test, correct ); - cl_long iErr = (long long) q2[j] - (long long) correct2; - int fail = ! (fabsf(err) <= f->double_ulps && abs_cl_long( iErr ) <= maxiError ); - if( ftz ) - { - // retry per section 6.5.3.2 - if( IsDoubleResultSubnormal(correct, f->double_ulps ) ) - { - fail = fail && ! ( test == 0.0f && iErr == 0 ); - if( ! fail ) - err = 0.0f; - } - - // retry per section 6.5.3.3 - if( IsDoubleSubnormal( s[j] ) ) - { - int correct5, correct6; - long double correct3 = f->dfunc.f_fpI( 0.0, &correct5 ); - long double correct4 = f->dfunc.f_fpI( -0.0, &correct6 ); - float err2 = Bruteforce_Ulp_Error_Double( test, correct3 ); - float err3 = Bruteforce_Ulp_Error_Double( test, correct4 ); - cl_long iErr2 = (long long) q2[j] - (long long) correct5; - cl_long iErr3 = (long long) q2[j] - (long long) correct6; - - // Did +0 work? - if( fabsf(err2) <= f->double_ulps && abs_cl_long( iErr2 ) <= maxiError ) - { - err = err2; - iErr = iErr2; - fail = 0; - } - // Did -0 work? - else if(fabsf(err3) <= f->double_ulps && abs_cl_long( iErr3 ) <= maxiError) - { - err = err3; - iErr = iErr3; - fail = 0; - } - - // retry per section 6.5.3.4 - if( fail && (IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps )) ) - { - fail = fail && ! ( test == 0.0f && (abs_cl_long( iErr2 ) <= maxiError || abs_cl_long( iErr3 ) <= maxiError) ); - if( ! fail ) - { - err = 0.0f; - iErr = 0; - } - } - } - } - if( fabsf(err ) > maxError ) - { - maxError = fabsf(err); - maxErrorVal = s[j]; - } - if( llabs(iErr) > maxError2 ) - { - maxError2 = llabs(iErr ); - maxErrorVal2 = s[j]; - } - - if( fail ) - { - vlog_error( "\nERROR: %sD%s: {%f, %d} ulp error at %.13la: *{%.13la, %d} vs. {%.13la, %d}\n", f->name, sizeNames[k], err, (int) iErr, ((double*) gIn)[j], ((double*) gOut_Ref)[j], ((int*) gOut_Ref2)[j], test, q2[j] ); - error = -1; - goto exit; - } - } - } - } - - if( 0 == (i & 0x0fffffff) ) - { - if (gVerboseBruteForce) - { - vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, bufferSize); - } else - { - vlog("." ); - } - fflush(stdout); - } - } - - if( ! gSkipCorrectnessTesting ) - { - if( gWimpyMode ) - vlog( "Wimp pass" ); - else - vlog( "passed" ); - } - - if( gMeasureTimes ) - { - //Init input array - double *p = (double *)gIn; - - for( j = 0; j < bufferSize / sizeof( double ); j++ ) - p[j] = DoubleFromUInt32(genrand_int32(d)); - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeValues[j] * sizeof(cl_double); - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; - if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; } - - double sum = 0.0; - double bestTime = INFINITY; - for( k = 0; k < PERF_LOOP_COUNT; k++ ) - { - uint64_t startTime = GetTime(); - if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILED -- could not execute kernel\n" ); - goto exit; - } - - // Make sure OpenCL is done - if( (error = clFinish(gQueue) ) ) - { - vlog_error( "Error %d at clFinish\n", error ); - goto exit; - } - - uint64_t endTime = GetTime(); - double time = SubtractTime( endTime, startTime ); - sum += time; - if( time < bestTime ) - bestTime = time; - } - - if( gReportAverageTimes ) - bestTime = sum / PERF_LOOP_COUNT; - double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) ); - vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sd%s", f->name, sizeNames[j] ); - } - for( ; j < gMaxVectorSizeIndex; j++ ) - vlog( "\t -- " ); - } - - if( ! gSkipCorrectnessTesting ) - vlog( "\t{%8.2f, %lld} @ %a", maxError, maxError2, maxErrorVal ); - vlog( "\n" ); - -exit: - // Release - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - clReleaseKernel(kernels[k]); - clReleaseProgram(programs[k]); - } - - return error; -} - - - diff --git a/test_conformance/math_brute_force/unary_two_results_i_double.cpp b/test_conformance/math_brute_force/unary_two_results_i_double.cpp new file mode 100644 index 0000000000..9ed77dce39 --- /dev/null +++ b/test_conformance/math_brute_force/unary_two_results_i_double.cpp @@ -0,0 +1,422 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "function_list.h" +#include "test_functions.h" +#include "utility.h" + +#include +#include + +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) +{ + const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global double", + sizeNames[vectorSize], + "* out, __global int", + sizeNames[vectorSize], + "* out2, __global double", + sizeNames[vectorSize], + "* in )\n" + "{\n" + " size_t i = get_global_id(0);\n" + " out[i] = ", + name, + "( in[i], out2 + i );\n" + "}\n" }; + + const char *c3[] = { + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global double* out, __global int* out2, __global double* in)\n" + "{\n" + " size_t i = get_global_id(0);\n" + " if( i + 1 < get_global_size(0) )\n" + " {\n" + " double3 f0 = vload3( 0, in + 3 * i );\n" + " int3 iout = INT_MIN;\n" + " f0 = ", + name, + "( f0, &iout );\n" + " vstore3( f0, 0, out + 3*i );\n" + " vstore3( iout, 0, out2 + 3*i );\n" + " }\n" + " else\n" + " {\n" + " size_t parity = i & 1; // Figure out how many elements are " + "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " + "buffer size \n" + " int3 iout = INT_MIN;\n" + " double3 f0;\n" + " switch( parity )\n" + " {\n" + " case 1:\n" + " f0 = (double3)( in[3*i], NAN, NAN ); \n" + " break;\n" + " case 0:\n" + " f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n" + " break;\n" + " }\n" + " f0 = ", + name, + "( f0, &iout );\n" + " switch( parity )\n" + " {\n" + " case 0:\n" + " out[3*i+1] = f0.y; \n" + " out2[3*i+1] = iout.y; \n" + " // fall through\n" + " case 1:\n" + " out[3*i] = f0.x; \n" + " out2[3*i] = iout.x; \n" + " break;\n" + " }\n" + " }\n" + "}\n" + }; + + const char **kern = c; + size_t kernSize = sizeof(c) / sizeof(c[0]); + + if (sizeValues[vectorSize] == 3) + { + kern = c3; + kernSize = sizeof(c3) / sizeof(c3[0]); + } + + char testName[32]; + snprintf(testName, sizeof(testName) - 1, "math_kernel%s", + sizeNames[vectorSize]); + + return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); +} + +struct BuildKernelInfo +{ + cl_uint offset; // the first vector size to build + cl_kernel *kernels; + cl_program *programs; + const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. +}; + +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +{ + BuildKernelInfo *info = (BuildKernelInfo *)p; + cl_uint i = info->offset + job_id; + return BuildKernel(info->nameInCode, i, info->kernels + i, + info->programs + i, info->relaxedMode); +} + +cl_ulong abs_cl_long(cl_long i) +{ + cl_long mask = i >> 63; + return (i ^ mask) - mask; +} + +} // anonymous namespace + +int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode) +{ + int error; + cl_program programs[VECTOR_SIZE_COUNT]; + cl_kernel kernels[VECTOR_SIZE_COUNT]; + float maxError = 0.0f; + int64_t maxError2 = 0; + int ftz = f->ftz || gForceFTZ; + double maxErrorVal = 0.0f; + double maxErrorVal2 = 0.0f; + cl_ulong maxiError = f->double_ulps == INFINITY ? CL_ULONG_MAX : 0; + uint64_t step = getTestStep(sizeof(cl_double), BUFFER_SIZE); + int scale = + (int)((1ULL << 32) / (16 * BUFFER_SIZE / sizeof(cl_double)) + 1); + + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); + + Force64BitFPUPrecision(); + + // Init the kernels + { + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + return error; + } + + for (uint64_t i = 0; i < (1ULL << 32); i += step) + { + // Init input array + double *p = (double *)gIn; + if (gWimpyMode) + { + for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++) + p[j] = DoubleFromUInt32((uint32_t)i + j * scale); + } + else + { + for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++) + p[j] = DoubleFromUInt32((uint32_t)i + j); + } + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, + BUFFER_SIZE, gIn, 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); + return error; + } + + // write garbage into output arrays + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + uint32_t pattern = 0xffffdead; + memset_pattern4(gOut[j], &pattern, BUFFER_SIZE); + if ((error = + clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, + BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", + error, j); + goto exit; + } + + memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE); + if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, + 0, BUFFER_SIZE, gOut2[j], 0, NULL, + NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", + error, j); + goto exit; + } + } + + // Run the kernels + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + size_t vectorSize = sizeValues[j] * sizeof(cl_double); + size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; + if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]), + &gOutBuffer[j]))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]), + &gOutBuffer2[j]))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer), + &gInBuffer))) + { + LogBuildError(programs[j]); + goto exit; + } + + if ((error = + clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, + &localCount, NULL, 0, NULL, NULL))) + { + vlog_error("FAILED -- could not execute kernel\n"); + goto exit; + } + } + + // Get that moving + if ((error = clFlush(gQueue))) vlog("clFlush failed\n"); + + // Calculate the correctly rounded reference result + double *r = (double *)gOut_Ref; + int *r2 = (int *)gOut_Ref2; + double *s = (double *)gIn; + for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++) + r[j] = (double)f->dfunc.f_fpI(s[j], r2 + j); + + // Read the data back + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + if ((error = + clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, + BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + { + vlog_error("ReadArray failed %d\n", error); + goto exit; + } + if ((error = + clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, + BUFFER_SIZE, gOut2[j], 0, NULL, NULL))) + { + vlog_error("ReadArray2 failed %d\n", error); + goto exit; + } + } + + if (gSkipCorrectnessTesting) break; + + // Verify data + uint64_t *t = (uint64_t *)gOut_Ref; + int32_t *t2 = (int32_t *)gOut_Ref2; + for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++) + { + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + uint64_t *q = (uint64_t *)(gOut[k]); + int32_t *q2 = (int32_t *)(gOut2[k]); + + // If we aren't getting the correctly rounded result + if (t[j] != q[j] || t2[j] != q2[j]) + { + double test = ((double *)q)[j]; + int correct2 = INT_MIN; + long double correct = f->dfunc.f_fpI(s[j], &correct2); + float err = Bruteforce_Ulp_Error_Double(test, correct); + cl_long iErr = (long long)q2[j] - (long long)correct2; + int fail = !(fabsf(err) <= f->double_ulps + && abs_cl_long(iErr) <= maxiError); + if (ftz) + { + // retry per section 6.5.3.2 + if (IsDoubleResultSubnormal(correct, f->double_ulps)) + { + fail = fail && !(test == 0.0f && iErr == 0); + if (!fail) err = 0.0f; + } + + // retry per section 6.5.3.3 + if (IsDoubleSubnormal(s[j])) + { + int correct5, correct6; + long double correct3 = + f->dfunc.f_fpI(0.0, &correct5); + long double correct4 = + f->dfunc.f_fpI(-0.0, &correct6); + float err2 = + Bruteforce_Ulp_Error_Double(test, correct3); + float err3 = + Bruteforce_Ulp_Error_Double(test, correct4); + cl_long iErr2 = + (long long)q2[j] - (long long)correct5; + cl_long iErr3 = + (long long)q2[j] - (long long)correct6; + + // Did +0 work? + if (fabsf(err2) <= f->double_ulps + && abs_cl_long(iErr2) <= maxiError) + { + err = err2; + iErr = iErr2; + fail = 0; + } + // Did -0 work? + else if (fabsf(err3) <= f->double_ulps + && abs_cl_long(iErr3) <= maxiError) + { + err = err3; + iErr = iErr3; + fail = 0; + } + + // retry per section 6.5.3.4 + if (fail + && (IsDoubleResultSubnormal(correct2, + f->double_ulps) + || IsDoubleResultSubnormal(correct3, + f->double_ulps))) + { + fail = fail + && !(test == 0.0f + && (abs_cl_long(iErr2) <= maxiError + || abs_cl_long(iErr3) + <= maxiError)); + if (!fail) + { + err = 0.0f; + iErr = 0; + } + } + } + } + if (fabsf(err) > maxError) + { + maxError = fabsf(err); + maxErrorVal = s[j]; + } + if (llabs(iErr) > maxError2) + { + maxError2 = llabs(iErr); + maxErrorVal2 = s[j]; + } + + if (fail) + { + vlog_error("\nERROR: %sD%s: {%f, %d} ulp error at " + "%.13la: *{%.13la, %d} vs. {%.13la, %d}\n", + f->name, sizeNames[k], err, (int)iErr, + ((double *)gIn)[j], ((double *)gOut_Ref)[j], + ((int *)gOut_Ref2)[j], test, q2[j]); + error = -1; + goto exit; + } + } + } + } + + if (0 == (i & 0x0fffffff)) + { + if (gVerboseBruteForce) + { + vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, + BUFFER_SIZE); + } + else + { + vlog("."); + } + fflush(stdout); + } + } + + if (!gSkipCorrectnessTesting) + { + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal, + maxErrorVal2); + } + + vlog("\n"); + +exit: + // Release + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + clReleaseKernel(kernels[k]); + clReleaseProgram(programs[k]); + } + + return error; +} diff --git a/test_conformance/math_brute_force/unary_two_results_i_float.cpp b/test_conformance/math_brute_force/unary_two_results_i_float.cpp new file mode 100644 index 0000000000..d048220b1f --- /dev/null +++ b/test_conformance/math_brute_force/unary_two_results_i_float.cpp @@ -0,0 +1,420 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "function_list.h" +#include "test_functions.h" +#include "utility.h" + +#include +#include + +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) +{ + const char *c[] = { "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global float", + sizeNames[vectorSize], + "* out, __global int", + sizeNames[vectorSize], + "* out2, __global float", + sizeNames[vectorSize], + "* in )\n" + "{\n" + " size_t i = get_global_id(0);\n" + " out[i] = ", + name, + "( in[i], out2 + i );\n" + "}\n" }; + + const char *c3[] = { + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global float* out, __global int* out2, __global float* in)\n" + "{\n" + " size_t i = get_global_id(0);\n" + " if( i + 1 < get_global_size(0) )\n" + " {\n" + " float3 f0 = vload3( 0, in + 3 * i );\n" + " int3 iout = INT_MIN;\n" + " f0 = ", + name, + "( f0, &iout );\n" + " vstore3( f0, 0, out + 3*i );\n" + " vstore3( iout, 0, out2 + 3*i );\n" + " }\n" + " else\n" + " {\n" + " size_t parity = i & 1; // Figure out how many elements are " + "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " + "buffer size \n" + " int3 iout = INT_MIN;\n" + " float3 f0;\n" + " switch( parity )\n" + " {\n" + " case 1:\n" + " f0 = (float3)( in[3*i], NAN, NAN ); \n" + " break;\n" + " case 0:\n" + " f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n" + " break;\n" + " }\n" + " f0 = ", + name, + "( f0, &iout );\n" + " switch( parity )\n" + " {\n" + " case 0:\n" + " out[3*i+1] = f0.y; \n" + " out2[3*i+1] = iout.y; \n" + " // fall through\n" + " case 1:\n" + " out[3*i] = f0.x; \n" + " out2[3*i] = iout.x; \n" + " break;\n" + " }\n" + " }\n" + "}\n" + }; + + const char **kern = c; + size_t kernSize = sizeof(c) / sizeof(c[0]); + + if (sizeValues[vectorSize] == 3) + { + kern = c3; + kernSize = sizeof(c3) / sizeof(c3[0]); + } + + char testName[32]; + snprintf(testName, sizeof(testName) - 1, "math_kernel%s", + sizeNames[vectorSize]); + + return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); +} + +struct BuildKernelInfo +{ + cl_uint offset; // the first vector size to build + cl_kernel *kernels; + cl_program *programs; + const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. +}; + +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +{ + BuildKernelInfo *info = (BuildKernelInfo *)p; + cl_uint i = info->offset + job_id; + return BuildKernel(info->nameInCode, i, info->kernels + i, + info->programs + i, info->relaxedMode); +} + +cl_ulong abs_cl_long(cl_long i) +{ + cl_long mask = i >> 63; + return (i ^ mask) - mask; +} + +} // anonymous namespace + +int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode) +{ + int error; + cl_program programs[VECTOR_SIZE_COUNT]; + cl_kernel kernels[VECTOR_SIZE_COUNT]; + float maxError = 0.0f; + int64_t maxError2 = 0; + int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + float maxErrorVal = 0.0f; + float maxErrorVal2 = 0.0f; + uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE); + int scale = (int)((1ULL << 32) / (16 * BUFFER_SIZE / sizeof(float)) + 1); + cl_ulong maxiError; + + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + + float float_ulps; + if (gIsEmbedded) + float_ulps = f->float_embedded_ulps; + else + float_ulps = f->float_ulps; + + maxiError = float_ulps == INFINITY ? CL_ULONG_MAX : 0; + + // Init the kernels + { + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + return error; + } + + for (uint64_t i = 0; i < (1ULL << 32); i += step) + { + // Init input array + uint32_t *p = (uint32_t *)gIn; + if (gWimpyMode) + { + for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++) + p[j] = (uint32_t)i + j * scale; + } + else + { + for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++) + p[j] = (uint32_t)i + j; + } + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, + BUFFER_SIZE, gIn, 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); + return error; + } + + // write garbage into output arrays + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + uint32_t pattern = 0xffffdead; + memset_pattern4(gOut[j], &pattern, BUFFER_SIZE); + if ((error = + clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, + BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", + error, j); + goto exit; + } + + memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE); + if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, + 0, BUFFER_SIZE, gOut2[j], 0, NULL, + NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", + error, j); + goto exit; + } + } + + // Run the kernels + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + size_t vectorSize = sizeValues[j] * sizeof(cl_float); + size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; + if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]), + &gOutBuffer[j]))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]), + &gOutBuffer2[j]))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer), + &gInBuffer))) + { + LogBuildError(programs[j]); + goto exit; + } + + if ((error = + clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, + &localCount, NULL, 0, NULL, NULL))) + { + vlog_error("FAILED -- could not execute kernel\n"); + goto exit; + } + } + + // Get that moving + if ((error = clFlush(gQueue))) vlog("clFlush failed\n"); + + // Calculate the correctly rounded reference result + float *r = (float *)gOut_Ref; + int *r2 = (int *)gOut_Ref2; + float *s = (float *)gIn; + for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++) + r[j] = (float)f->func.f_fpI(s[j], r2 + j); + + // Read the data back + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + if ((error = + clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, + BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + { + vlog_error("ReadArray failed %d\n", error); + goto exit; + } + if ((error = + clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, + BUFFER_SIZE, gOut2[j], 0, NULL, NULL))) + { + vlog_error("ReadArray2 failed %d\n", error); + goto exit; + } + } + + if (gSkipCorrectnessTesting) break; + + // Verify data + uint32_t *t = (uint32_t *)gOut_Ref; + int32_t *t2 = (int32_t *)gOut_Ref2; + for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++) + { + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + uint32_t *q = (uint32_t *)(gOut[k]); + int32_t *q2 = (int32_t *)(gOut2[k]); + + // If we aren't getting the correctly rounded result + if (t[j] != q[j] || t2[j] != q2[j]) + { + float test = ((float *)q)[j]; + int correct2 = INT_MIN; + double correct = f->func.f_fpI(s[j], &correct2); + float err = Ulp_Error(test, correct); + cl_long iErr = (int64_t)q2[j] - (int64_t)correct2; + int fail = !(fabsf(err) <= float_ulps + && abs_cl_long(iErr) <= maxiError); + if (ftz) + { + // retry per section 6.5.3.2 + if (IsFloatResultSubnormal(correct, float_ulps)) + { + fail = fail && !(test == 0.0f && iErr == 0); + if (!fail) err = 0.0f; + } + + // retry per section 6.5.3.3 + if (IsFloatSubnormal(s[j])) + { + int correct5, correct6; + double correct3 = f->func.f_fpI(0.0, &correct5); + double correct4 = f->func.f_fpI(-0.0, &correct6); + float err2 = Ulp_Error(test, correct3); + float err3 = Ulp_Error(test, correct4); + cl_long iErr2 = + (long long)q2[j] - (long long)correct5; + cl_long iErr3 = + (long long)q2[j] - (long long)correct6; + + // Did +0 work? + if (fabsf(err2) <= float_ulps + && abs_cl_long(iErr2) <= maxiError) + { + err = err2; + iErr = iErr2; + fail = 0; + } + // Did -0 work? + else if (fabsf(err3) <= float_ulps + && abs_cl_long(iErr3) <= maxiError) + { + err = err3; + iErr = iErr3; + fail = 0; + } + + // retry per section 6.5.3.4 + if (fail + && (IsFloatResultSubnormal(correct2, float_ulps) + || IsFloatResultSubnormal(correct3, + float_ulps))) + { + fail = fail + && !(test == 0.0f + && (abs_cl_long(iErr2) <= maxiError + || abs_cl_long(iErr3) + <= maxiError)); + if (!fail) + { + err = 0.0f; + iErr = 0; + } + } + } + } + if (fabsf(err) > maxError) + { + maxError = fabsf(err); + maxErrorVal = s[j]; + } + if (llabs(iErr) > maxError2) + { + maxError2 = llabs(iErr); + maxErrorVal2 = s[j]; + } + + if (fail) + { + vlog_error("\nERROR: %s%s: {%f, %d} ulp error at %a: " + "*{%a, %d} vs. {%a, %d}\n", + f->name, sizeNames[k], err, (int)iErr, + ((float *)gIn)[j], ((float *)gOut_Ref)[j], + ((int *)gOut_Ref2)[j], test, q2[j]); + error = -1; + goto exit; + } + } + } + } + + if (0 == (i & 0x0fffffff)) + { + if (gVerboseBruteForce) + { + vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, + BUFFER_SIZE); + } + else + { + vlog("."); + } + fflush(stdout); + } + } + + if (!gSkipCorrectnessTesting) + { + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal, + maxErrorVal2); + } + + vlog("\n"); + +exit: + // Release + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + clReleaseKernel(kernels[k]); + clReleaseProgram(programs[k]); + } + + return error; +} diff --git a/test_conformance/math_brute_force/unary_u.cpp b/test_conformance/math_brute_force/unary_u.cpp deleted file mode 100644 index 87fcae321b..0000000000 --- a/test_conformance/math_brute_force/unary_u.cpp +++ /dev/null @@ -1,700 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "Utility.h" - -#include -#include "FunctionList.h" - -int TestFunc_Float_UInt(const Func *f, MTdata, bool relaxedMode); -int TestFunc_Double_ULong(const Func *f, MTdata, bool relaxedMode); - -extern const vtbl _unary_u = { "unary_u", TestFunc_Float_UInt, - TestFunc_Double_ULong }; - - -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode); -static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode); - -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) -{ - const char *c[] = { - "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global uint", sizeNames[vectorSize], "* in)\n" - "{\n" - " int i = get_global_id(0);\n" - " out[i] = ", name, "( in[i] );\n" - "}\n" - }; - const char *c3[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global uint* in)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " uint3 u0 = vload3( 0, in + 3 * i );\n" - " float3 f0 = ", name, "( u0 );\n" - " vstore3( f0, 0, out + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n" - " uint3 u0;\n" - " float3 f0;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " u0 = (uint3)( in[3*i], 0xdead, 0xdead ); \n" - " break;\n" - " case 0:\n" - " u0 = (uint3)( in[3*i], in[3*i+1], 0xdead ); \n" - " break;\n" - " }\n" - " f0 = ", name, "( u0 );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = f0.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = f0.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - - const char **kern = c; - size_t kernSize = sizeof(c)/sizeof(c[0]); - - if( sizeValues[vectorSize] == 3 ) - { - kern = c3; - kernSize = sizeof(c3)/sizeof(c3[0]); - } - - char testName[32]; - snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - - return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); -} - -static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) -{ - const char *c[] = { - "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global ulong", sizeNames[vectorSize], "* in)\n" - "{\n" - " int i = get_global_id(0);\n" - " out[i] = ", name, "( in[i] );\n" - "}\n" - }; - - const char *c3[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global ulong* in)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " ulong3 u0 = vload3( 0, in + 3 * i );\n" - " double3 f0 = ", name, "( u0 );\n" - " vstore3( f0, 0, out + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n" - " ulong3 u0;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " u0 = (ulong3)( in[3*i], 0xdeaddeaddeaddeadUL, 0xdeaddeaddeaddeadUL ); \n" - " break;\n" - " case 0:\n" - " u0 = (ulong3)( in[3*i], in[3*i+1], 0xdeaddeaddeaddeadUL ); \n" - " break;\n" - " }\n" - " double3 f0 = ", name, "( u0 );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = f0.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = f0.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - - const char **kern = c; - size_t kernSize = sizeof(c)/sizeof(c[0]); - - if( sizeValues[vectorSize] == 3 ) - { - kern = c3; - kernSize = sizeof(c3)/sizeof(c3[0]); - } - - - char testName[32]; - snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - - return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); -} - -typedef struct BuildKernelInfo -{ - cl_uint offset; // the first vector size to build - cl_kernel *kernels; - cl_program *programs; - const char *nameInCode; - bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -}BuildKernelInfo; - -static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); -static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ) -{ - BuildKernelInfo *info = (BuildKernelInfo*) p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernels + i, - info->programs + i, info->relaxedMode); -} - -static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); -static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ) -{ - BuildKernelInfo *info = (BuildKernelInfo*) p; - cl_uint i = info->offset + job_id; - return BuildKernelDouble(info->nameInCode, i, info->kernels + i, - info->programs + i, info->relaxedMode); -} - -int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode) -{ - uint64_t i; - uint32_t j, k; - int error; - cl_program programs[ VECTOR_SIZE_COUNT ]; - cl_kernel kernels[ VECTOR_SIZE_COUNT ]; - float maxError = 0.0f; - int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); - float maxErrorVal = 0.0f; - size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE; - - uint64_t step = bufferSize / sizeof( float ); - int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( double )) + 1); - int isRangeLimited = 0; - float float_ulps; - float half_sin_cos_tan_limit = 0; - - logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - if( gWimpyMode ) - { - step = (1ULL<<32) * gWimpyReductionFactor / (512); - } - if( gIsEmbedded) - float_ulps = f->float_embedded_ulps; - else - float_ulps = f->float_ulps; - - // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; - if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) - return error; -/* - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - if( (error = BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) ) - return error; -*/ - - if( 0 == strcmp( f->name, "half_sin") || 0 == strcmp( f->name, "half_cos") ) - { - isRangeLimited = 1; - half_sin_cos_tan_limit = 1.0f + float_ulps * (FLT_EPSILON/2.0f); // out of range results from finite inputs must be in [-1,1] - } - else if( 0 == strcmp( f->name, "half_tan")) - { - isRangeLimited = 1; - half_sin_cos_tan_limit = INFINITY; // out of range resut from finite inputs must be numeric - } - - - for( i = 0; i < (1ULL<<32); i += step ) - { - //Init input array - uint32_t *p = (uint32_t *)gIn; - if( gWimpyMode ) - { - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - p[j] = (uint32_t) i + j * scale; - } - else - { - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - p[j] = (uint32_t) i + j; - } - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL))) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - - // write garbage into output arrays - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - uint32_t pattern = 0xffffdead; - memset_pattern4(gOut[j], &pattern, bufferSize); - if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL))) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j ); - goto exit; - } - } - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeValues[j] * sizeof(cl_float); - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; - if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ))){ LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; } - - if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL))) - { - vlog_error( "FAILURE -- could not execute kernel\n" ); - goto exit; - } - } - - // Get that moving - if( (error = clFlush(gQueue) )) - vlog( "clFlush failed\n" ); - - //Calculate the correctly rounded reference result - float *r = (float*) gOut_Ref; - cl_uint *s = (cl_uint*) gIn; - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - r[j] = (float) f->func.f_u( s[j] ); - - // Read the data back - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL))) - { - vlog_error( "ReadArray failed %d\n", error ); - goto exit; - } - } - - if( gSkipCorrectnessTesting ) - break; - - - //Verify data - uint32_t *t = (uint32_t*) gOut_Ref; - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - { - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - uint32_t *q = (uint32_t*)(gOut[k]); - - // If we aren't getting the correctly rounded result - if( t[j] != q[j] ) - { - float test = ((float*) q)[j]; - double correct = f->func.f_u( s[j] ); - float err = Ulp_Error( test, correct ); - int fail = ! (fabsf(err) <= float_ulps); - - // half_sin/cos/tan are only valid between +-2**16, Inf, NaN - if( isRangeLimited && fabsf(s[j]) > MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16) && fabsf(s[j]) < INFINITY ) - { - if( fabsf( test ) <= half_sin_cos_tan_limit ) - { - err = 0; - fail = 0; - } - } - - if( fail ) - { - if( ftz ) - { - // retry per section 6.5.3.2 - if( IsFloatResultSubnormal(correct, float_ulps) ) - { - fail = fail && ( test != 0.0f ); - if( ! fail ) - err = 0.0f; - } - } - } - if( fabsf(err ) > maxError ) - { - maxError = fabsf(err); - maxErrorVal = s[j]; - } - if( fail ) - { - vlog_error( "\n%s%s: %f ulp error at 0x%8.8x: *%a vs. %a\n", f->name, sizeNames[k], err, ((uint32_t*) gIn)[j], ((float*) gOut_Ref)[j], test ); - error = -1; - goto exit; - } - } - } - } - - if( 0 == (i & 0x0fffffff) ) - { - if (gVerboseBruteForce) - { - vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, bufferSize); - } else - { - vlog("." ); - } - fflush(stdout); - } - } - - - if( ! gSkipCorrectnessTesting ) - { - if( gWimpyMode ) - vlog( "Wimp pass" ); - else - vlog( "passed" ); - } - - if( gMeasureTimes ) - { - //Init input array - uint32_t *p = (uint32_t*)gIn; - if( strstr( f->name, "exp" ) || strstr( f->name, "sin" ) || strstr( f->name, "cos" ) || strstr( f->name, "tan" ) ) - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - ((float*)p)[j] = (float) genrand_real1(d); - else if( strstr( f->name, "log" ) ) - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - p[j] = genrand_int32(d) & 0x7fffffff; - else - for( j = 0; j < bufferSize / sizeof( float ); j++ ) - p[j] = genrand_int32(d); - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeValues[j] * sizeof(cl_float); - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; - if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; } - - double sum = 0.0; - double bestTime = INFINITY; - for( k = 0; k < PERF_LOOP_COUNT; k++ ) - { - uint64_t startTime = GetTime(); - if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILURE -- could not execute kernel\n" ); - goto exit; - } - - // Make sure OpenCL is done - if( (error = clFinish(gQueue) ) ) - { - vlog_error( "Error %d at clFinish\n", error ); - goto exit; - } - - uint64_t endTime = GetTime(); - double time = SubtractTime( endTime, startTime ); - sum += time; - if( time < bestTime ) - bestTime = time; - } - - if( gReportAverageTimes ) - bestTime = sum / PERF_LOOP_COUNT; - double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) ); - vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] ); - } - } - - if( ! gSkipCorrectnessTesting ) - vlog( "\t%8.2f @ %a", maxError, maxErrorVal ); - vlog( "\n" ); - -exit: - // Release - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - clReleaseKernel(kernels[k]); - clReleaseProgram(programs[k]); - } - - return error; -} - -static cl_ulong random64( MTdata d ) -{ - return (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32); -} - -int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode) -{ - uint64_t i; - uint32_t j, k; - int error; - cl_program programs[ VECTOR_SIZE_COUNT ]; - cl_kernel kernels[ VECTOR_SIZE_COUNT ]; - float maxError = 0.0f; - int ftz = f->ftz || gForceFTZ; - double maxErrorVal = 0.0f; - size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE; - uint64_t step = bufferSize / sizeof( cl_double ); - - logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); - if( gWimpyMode ) - { - step = (1ULL<<32) * gWimpyReductionFactor / (512); - } - Force64BitFPUPrecision(); - - // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; - if( (error = ThreadPool_Do( BuildKernel_DoubleFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info ) )) - { - return error; - } -/* - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) - if( (error = BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) ) - return error; -*/ - - for( i = 0; i < (1ULL<<32); i += step ) - { - //Init input array - cl_ulong *p = (cl_ulong *)gIn; - for( j = 0; j < bufferSize / sizeof( cl_ulong ); j++ ) - p[j] = random64(d); - - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL))) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - - // write garbage into output arrays - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - uint32_t pattern = 0xffffdead; - memset_pattern4(gOut[j], &pattern, bufferSize); - if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL))) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j ); - goto exit; - } - } - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeValues[j] * sizeof(cl_double); - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; - if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ))){ LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; } - - if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL))) - { - vlog_error( "FAILURE -- could not execute kernel\n" ); - goto exit; - } - } - - // Get that moving - if( (error = clFlush(gQueue) )) - vlog( "clFlush failed\n" ); - - //Calculate the correctly rounded reference result - double *r = (double*) gOut_Ref; - cl_ulong *s = (cl_ulong*) gIn; - for( j = 0; j < bufferSize / sizeof( cl_double ); j++ ) - r[j] = (double) f->dfunc.f_u( s[j] ); - - // Read the data back - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL))) - { - vlog_error( "ReadArray failed %d\n", error ); - goto exit; - } - } - - if( gSkipCorrectnessTesting ) - break; - - - //Verify data - uint64_t *t = (uint64_t*) gOut_Ref; - for( j = 0; j < bufferSize / sizeof( cl_double ); j++ ) - { - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - uint64_t *q = (uint64_t*)(gOut[k]); - - // If we aren't getting the correctly rounded result - if( t[j] != q[j] ) - { - double test = ((double*) q)[j]; - long double correct = f->dfunc.f_u( s[j] ); - float err = Bruteforce_Ulp_Error_Double(test, correct); - int fail = ! (fabsf(err) <= f->double_ulps); - - // half_sin/cos/tan are only valid between +-2**16, Inf, NaN - if( fail ) - { - if( ftz ) - { - // retry per section 6.5.3.2 - if( IsDoubleResultSubnormal(correct, f->double_ulps) ) - { - fail = fail && ( test != 0.0 ); - if( ! fail ) - err = 0.0f; - } - } - } - if( fabsf(err ) > maxError ) - { - maxError = fabsf(err); - maxErrorVal = s[j]; - } - if( fail ) - { - vlog_error( "\n%s%sD: %f ulp error at 0x%16.16llx: *%.13la vs. %.13la\n", f->name, sizeNames[k], err, ((uint64_t*) gIn)[j], ((double*) gOut_Ref)[j], test ); - error = -1; - goto exit; - } - } - } - } - - if( 0 == (i & 0x0fffffff) ) - { - if (gVerboseBruteForce) - { - vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, bufferSize); - } else - { - vlog("." ); - } - fflush(stdout); - } - } - - if( ! gSkipCorrectnessTesting ) - { - if( gWimpyMode ) - vlog( "Wimp pass" ); - else - vlog( "passed" ); - } - - if( gMeasureTimes ) - { - //Init input array - double *p = (double*) gIn; - - for( j = 0; j < bufferSize / sizeof( double ); j++ ) - p[j] = random64(d); - if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) )) - { - vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error ); - return error; - } - - - // Run the kernels - for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ ) - { - size_t vectorSize = sizeValues[j] * sizeof(cl_double); - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; - if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; } - if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; } - - double sum = 0.0; - double bestTime = INFINITY; - for( k = 0; k < PERF_LOOP_COUNT; k++ ) - { - uint64_t startTime = GetTime(); - if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) ) - { - vlog_error( "FAILURE -- could not execute kernel\n" ); - goto exit; - } - - // Make sure OpenCL is done - if( (error = clFinish(gQueue) ) ) - { - vlog_error( "Error %d at clFinish\n", error ); - goto exit; - } - - uint64_t endTime = GetTime(); - double time = SubtractTime( endTime, startTime ); - sum += time; - if( time < bestTime ) - bestTime = time; - } - - if( gReportAverageTimes ) - bestTime = sum / PERF_LOOP_COUNT; - double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) ); - vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] ); - } - for( ; j < gMaxVectorSizeIndex; j++ ) - vlog( "\t -- " ); - } - - if( ! gSkipCorrectnessTesting ) - vlog( "\t%8.2f @ %a", maxError, maxErrorVal ); - vlog( "\n" ); - -exit: - // Release - for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ ) - { - clReleaseKernel(kernels[k]); - clReleaseProgram(programs[k]); - } - - return error; -} - diff --git a/test_conformance/math_brute_force/unary_u_double.cpp b/test_conformance/math_brute_force/unary_u_double.cpp new file mode 100644 index 0000000000..9478d0bc2d --- /dev/null +++ b/test_conformance/math_brute_force/unary_u_double.cpp @@ -0,0 +1,318 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "function_list.h" +#include "test_functions.h" +#include "utility.h" + +#include + +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) +{ + const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global double", + sizeNames[vectorSize], + "* out, __global ulong", + sizeNames[vectorSize], + "* in )\n" + "{\n" + " size_t i = get_global_id(0);\n" + " out[i] = ", + name, + "( in[i] );\n" + "}\n" }; + + const char *c3[] = { + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global double* out, __global ulong* in )\n" + "{\n" + " size_t i = get_global_id(0);\n" + " if( i + 1 < get_global_size(0) )\n" + " {\n" + " ulong3 u0 = vload3( 0, in + 3 * i );\n" + " double3 f0 = ", + name, + "( u0 );\n" + " vstore3( f0, 0, out + 3*i );\n" + " }\n" + " else\n" + " {\n" + " size_t parity = i & 1; // Figure out how many elements are " + "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " + "buffer size \n" + " ulong3 u0;\n" + " switch( parity )\n" + " {\n" + " case 1:\n" + " u0 = (ulong3)( in[3*i], 0xdeaddeaddeaddeadUL, " + "0xdeaddeaddeaddeadUL ); \n" + " break;\n" + " case 0:\n" + " u0 = (ulong3)( in[3*i], in[3*i+1], " + "0xdeaddeaddeaddeadUL ); \n" + " break;\n" + " }\n" + " double3 f0 = ", + name, + "( u0 );\n" + " switch( parity )\n" + " {\n" + " case 0:\n" + " out[3*i+1] = f0.y; \n" + " // fall through\n" + " case 1:\n" + " out[3*i] = f0.x; \n" + " break;\n" + " }\n" + " }\n" + "}\n" + }; + + const char **kern = c; + size_t kernSize = sizeof(c) / sizeof(c[0]); + + if (sizeValues[vectorSize] == 3) + { + kern = c3; + kernSize = sizeof(c3) / sizeof(c3[0]); + } + + char testName[32]; + snprintf(testName, sizeof(testName) - 1, "math_kernel%s", + sizeNames[vectorSize]); + + return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); +} + +struct BuildKernelInfo +{ + cl_uint offset; // the first vector size to build + cl_kernel *kernels; + cl_program *programs; + const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. +}; + +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +{ + BuildKernelInfo *info = (BuildKernelInfo *)p; + cl_uint i = info->offset + job_id; + return BuildKernel(info->nameInCode, i, info->kernels + i, + info->programs + i, info->relaxedMode); +} + +cl_ulong random64(MTdata d) +{ + return (cl_ulong)genrand_int32(d) | ((cl_ulong)genrand_int32(d) << 32); +} + +} // anonymous namespace + +int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode) +{ + int error; + cl_program programs[VECTOR_SIZE_COUNT]; + cl_kernel kernels[VECTOR_SIZE_COUNT]; + float maxError = 0.0f; + int ftz = f->ftz || gForceFTZ; + double maxErrorVal = 0.0f; + uint64_t step = getTestStep(sizeof(cl_double), BUFFER_SIZE); + + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); + + Force64BitFPUPrecision(); + + // Init the kernels + { + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + return error; + } + + for (uint64_t i = 0; i < (1ULL << 32); i += step) + { + // Init input array + cl_ulong *p = (cl_ulong *)gIn; + for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_ulong); j++) + p[j] = random64(d); + + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, + BUFFER_SIZE, gIn, 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); + return error; + } + + // write garbage into output arrays + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + uint32_t pattern = 0xffffdead; + memset_pattern4(gOut[j], &pattern, BUFFER_SIZE); + if ((error = + clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, + BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", + error, j); + goto exit; + } + } + + // Run the kernels + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + size_t vectorSize = sizeValues[j] * sizeof(cl_double); + size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; + if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]), + &gOutBuffer[j]))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer), + &gInBuffer))) + { + LogBuildError(programs[j]); + goto exit; + } + + if ((error = + clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, + &localCount, NULL, 0, NULL, NULL))) + { + vlog_error("FAILED -- could not execute kernel\n"); + goto exit; + } + } + + // Get that moving + if ((error = clFlush(gQueue))) vlog("clFlush failed\n"); + + // Calculate the correctly rounded reference result + double *r = (double *)gOut_Ref; + cl_ulong *s = (cl_ulong *)gIn; + for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++) + r[j] = (double)f->dfunc.f_u(s[j]); + + // Read the data back + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + if ((error = + clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, + BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + { + vlog_error("ReadArray failed %d\n", error); + goto exit; + } + } + + if (gSkipCorrectnessTesting) break; + + // Verify data + uint64_t *t = (uint64_t *)gOut_Ref; + for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++) + { + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + uint64_t *q = (uint64_t *)(gOut[k]); + + // If we aren't getting the correctly rounded result + if (t[j] != q[j]) + { + double test = ((double *)q)[j]; + long double correct = f->dfunc.f_u(s[j]); + float err = Bruteforce_Ulp_Error_Double(test, correct); + int fail = !(fabsf(err) <= f->double_ulps); + + if (fail) + { + if (ftz) + { + // retry per section 6.5.3.2 + if (IsDoubleResultSubnormal(correct, + f->double_ulps)) + { + fail = fail && (test != 0.0); + if (!fail) err = 0.0f; + } + } + } + if (fabsf(err) > maxError) + { + maxError = fabsf(err); + maxErrorVal = s[j]; + } + if (fail) + { + vlog_error("\n%s%sD: %f ulp error at 0x%16.16llx: " + "*%.13la vs. %.13la\n", + f->name, sizeNames[k], err, + ((uint64_t *)gIn)[j], + ((double *)gOut_Ref)[j], test); + error = -1; + goto exit; + } + } + } + } + + if (0 == (i & 0x0fffffff)) + { + if (gVerboseBruteForce) + { + vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, + BUFFER_SIZE); + } + else + { + vlog("."); + } + fflush(stdout); + } + } + + if (!gSkipCorrectnessTesting) + { + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t%8.2f @ %a", maxError, maxErrorVal); + } + + vlog("\n"); + +exit: + // Release + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + clReleaseKernel(kernels[k]); + clReleaseProgram(programs[k]); + } + + return error; +} diff --git a/test_conformance/math_brute_force/unary_u_float.cpp b/test_conformance/math_brute_force/unary_u_float.cpp new file mode 100644 index 0000000000..848a9bacdf --- /dev/null +++ b/test_conformance/math_brute_force/unary_u_float.cpp @@ -0,0 +1,320 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "function_list.h" +#include "test_functions.h" +#include "utility.h" + +#include + +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) +{ + const char *c[] = { "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global float", + sizeNames[vectorSize], + "* out, __global uint", + sizeNames[vectorSize], + "* in )\n" + "{\n" + " size_t i = get_global_id(0);\n" + " out[i] = ", + name, + "( in[i] );\n" + "}\n" }; + + const char *c3[] = { + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global float* out, __global uint* in)\n" + "{\n" + " size_t i = get_global_id(0);\n" + " if( i + 1 < get_global_size(0) )\n" + " {\n" + " uint3 u0 = vload3( 0, in + 3 * i );\n" + " float3 f0 = ", + name, + "( u0 );\n" + " vstore3( f0, 0, out + 3*i );\n" + " }\n" + " else\n" + " {\n" + " size_t parity = i & 1; // Figure out how many elements are " + "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " + "buffer size \n" + " uint3 u0;\n" + " float3 f0;\n" + " switch( parity )\n" + " {\n" + " case 1:\n" + " u0 = (uint3)( in[3*i], 0xdead, 0xdead ); \n" + " break;\n" + " case 0:\n" + " u0 = (uint3)( in[3*i], in[3*i+1], 0xdead ); \n" + " break;\n" + " }\n" + " f0 = ", + name, + "( u0 );\n" + " switch( parity )\n" + " {\n" + " case 0:\n" + " out[3*i+1] = f0.y; \n" + " // fall through\n" + " case 1:\n" + " out[3*i] = f0.x; \n" + " break;\n" + " }\n" + " }\n" + "}\n" + }; + + const char **kern = c; + size_t kernSize = sizeof(c) / sizeof(c[0]); + + if (sizeValues[vectorSize] == 3) + { + kern = c3; + kernSize = sizeof(c3) / sizeof(c3[0]); + } + + char testName[32]; + snprintf(testName, sizeof(testName) - 1, "math_kernel%s", + sizeNames[vectorSize]); + + return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); +} + +struct BuildKernelInfo +{ + cl_uint offset; // the first vector size to build + cl_kernel *kernels; + cl_program *programs; + const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. +}; + +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +{ + BuildKernelInfo *info = (BuildKernelInfo *)p; + cl_uint i = info->offset + job_id; + return BuildKernel(info->nameInCode, i, info->kernels + i, + info->programs + i, info->relaxedMode); +} + +} // anonymous namespace + +int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode) +{ + int error; + cl_program programs[VECTOR_SIZE_COUNT]; + cl_kernel kernels[VECTOR_SIZE_COUNT]; + float maxError = 0.0f; + int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + float maxErrorVal = 0.0f; + uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE); + int scale = (int)((1ULL << 32) / (16 * BUFFER_SIZE / sizeof(double)) + 1); + + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + + float float_ulps; + if (gIsEmbedded) + float_ulps = f->float_embedded_ulps; + else + float_ulps = f->float_ulps; + + // Init the kernels + { + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + return error; + } + + for (uint64_t i = 0; i < (1ULL << 32); i += step) + { + // Init input array + uint32_t *p = (uint32_t *)gIn; + if (gWimpyMode) + { + for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++) + p[j] = (uint32_t)i + j * scale; + } + else + { + for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++) + p[j] = (uint32_t)i + j; + } + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, + BUFFER_SIZE, gIn, 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); + return error; + } + + // write garbage into output arrays + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + uint32_t pattern = 0xffffdead; + memset_pattern4(gOut[j], &pattern, BUFFER_SIZE); + if ((error = + clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, + BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + { + vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", + error, j); + goto exit; + } + } + + // Run the kernels + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + size_t vectorSize = sizeValues[j] * sizeof(cl_float); + size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; + if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]), + &gOutBuffer[j]))) + { + LogBuildError(programs[j]); + goto exit; + } + if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer), + &gInBuffer))) + { + LogBuildError(programs[j]); + goto exit; + } + + if ((error = + clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, + &localCount, NULL, 0, NULL, NULL))) + { + vlog_error("FAILED -- could not execute kernel\n"); + goto exit; + } + } + + // Get that moving + if ((error = clFlush(gQueue))) vlog("clFlush failed\n"); + + // Calculate the correctly rounded reference result + float *r = (float *)gOut_Ref; + cl_uint *s = (cl_uint *)gIn; + for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++) + r[j] = (float)f->func.f_u(s[j]); + + // Read the data back + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + if ((error = + clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, + BUFFER_SIZE, gOut[j], 0, NULL, NULL))) + { + vlog_error("ReadArray failed %d\n", error); + goto exit; + } + } + + if (gSkipCorrectnessTesting) break; + + // Verify data + uint32_t *t = (uint32_t *)gOut_Ref; + for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++) + { + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + uint32_t *q = (uint32_t *)(gOut[k]); + + // If we aren't getting the correctly rounded result + if (t[j] != q[j]) + { + float test = ((float *)q)[j]; + double correct = f->func.f_u(s[j]); + float err = Ulp_Error(test, correct); + int fail = !(fabsf(err) <= float_ulps); + + if (fail) + { + if (ftz) + { + // retry per section 6.5.3.2 + if (IsFloatResultSubnormal(correct, float_ulps)) + { + fail = fail && (test != 0.0f); + if (!fail) err = 0.0f; + } + } + } + if (fabsf(err) > maxError) + { + maxError = fabsf(err); + maxErrorVal = s[j]; + } + if (fail) + { + vlog_error( + "\n%s%s: %f ulp error at 0x%8.8x: *%a vs. %a\n", + f->name, sizeNames[k], err, ((uint32_t *)gIn)[j], + ((float *)gOut_Ref)[j], test); + error = -1; + goto exit; + } + } + } + } + + if (0 == (i & 0x0fffffff)) + { + if (gVerboseBruteForce) + { + vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, + BUFFER_SIZE); + } + else + { + vlog("."); + } + fflush(stdout); + } + } + + if (!gSkipCorrectnessTesting) + { + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t%8.2f @ %a", maxError, maxErrorVal); + } + + vlog("\n"); + +exit: + // Release + for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) + { + clReleaseKernel(kernels[k]); + clReleaseProgram(programs[k]); + } + + return error; +} diff --git a/test_conformance/math_brute_force/Utility.cpp b/test_conformance/math_brute_force/utility.cpp similarity index 59% rename from test_conformance/math_brute_force/Utility.cpp rename to test_conformance/math_brute_force/utility.cpp index 9ab7c7fa6b..9b0191ab36 100644 --- a/test_conformance/math_brute_force/Utility.cpp +++ b/test_conformance/math_brute_force/utility.cpp @@ -13,13 +13,14 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "Utility.h" -#include "FunctionList.h" + +#include "utility.h" +#include "function_list.h" #if defined(__PPC__) -// Global varaiable used to hold the FPU control register state. The FPSCR register can not -// be used because not all Power implementations retain or observed the NI (non-IEEE -// mode) bit. +// Global varaiable used to hold the FPU control register state. The FPSCR +// register can not be used because not all Power implementations retain or +// observed the NI (non-IEEE mode) bit. __thread fpu_control_t fpu_control = 0; #endif @@ -28,16 +29,16 @@ void MulD(double *rhi, double *rlo, double u, double v) const double c = 134217729.0; // 1+2^27 double up, u1, u2, vp, v1, v2; - up = u*c; + up = u * c; u1 = (u - up) + up; u2 = u - u1; - vp = v*c; + vp = v * c; v1 = (v - vp) + vp; v2 = v - v1; - double rh = u*v; - double rl = (((u1*v1 - rh) + (u1*v2)) + (u2*v1)) + (u2*v2); + double rh = u * v; + double rl = (((u1 * v1 - rh) + (u1 * v2)) + (u2 * v1)) + (u2 * v2); *rhi = rh; *rlo = rl; @@ -47,11 +48,13 @@ void AddD(double *rhi, double *rlo, double a, double b) { double zhi, zlo; zhi = a + b; - if(fabs(a) > fabs(b)) { + if (fabs(a) > fabs(b)) + { zlo = zhi - a; zlo = b - zlo; } - else { + else + { zlo = zhi - b; zlo = a - zlo; } @@ -66,17 +69,17 @@ void MulDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl) double c = 134217729.0; double up, u1, u2, vp, v1, v2; - up = xh*c; + up = xh * c; u1 = (xh - up) + up; u2 = xh - u1; - vp = yh*c; + vp = yh * c; v1 = (yh - vp) + vp; v2 = yh - v1; - mh = xh*yh; - ml = (((u1*v1 - mh) + (u1*v2)) + (u2*v1)) + (u2*v2); - ml += xh*yl + xl*yh; + mh = xh * yh; + ml = (((u1 * v1 - mh) + (u1 * v2)) + (u2 * v1)) + (u2 * v2); + ml += xh * yl + xl * yh; *rhi = mh + ml; *rlo = (mh - (*rhi)) + ml; @@ -86,7 +89,8 @@ void AddDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl) { double r, s; r = xh + yh; - s = (fabs(xh) > fabs(yh)) ? (xh - r + yh + yl + xl) : (yh - r + xh + xl + yl); + s = (fabs(xh) > fabs(yh)) ? (xh - r + yh + yl + xl) + : (yh - r + xh + xl + yl); *rhi = r + s; *rlo = (r - (*rhi)) + s; } @@ -100,72 +104,61 @@ void DivideDD(double *chi, double *clo, double a, double b) *clo = rhi / b; } -// These functions comapre two floats/doubles. Since some platforms may choose to -// flush denormals to zeros before comparison, comparison like a < b may give wrong -// result in "certain cases" where we do need correct compasion result when operands -// are denormals .... these functions comapre floats/doubles using signed integer/long int -// rep. In other cases, when flushing to zeros is fine, these should not be used. -// Also these doesn't check for nans and assume nans are handled separately as special edge case -// by the caller which calls these functions -// return 0 if both are equal, 1 if x > y and -1 if x < y. - -inline -int compareFloats(float x, float y) +// These functions comapre two floats/doubles. Since some platforms may choose +// to flush denormals to zeros before comparison, comparison like a < b may give +// wrong result in "certain cases" where we do need correct compasion result +// when operands are denormals .... these functions comapre floats/doubles using +// signed integer/long int rep. In other cases, when flushing to zeros is fine, +// these should not be used. Also these doesn't check for nans and assume nans +// are handled separately as special edge case by the caller which calls these +// functions return 0 if both are equal, 1 if x > y and -1 if x < y. + +inline int compareFloats(float x, float y) { int32f_t a, b; a.f = x; b.f = y; - if( a.i & 0x80000000 ) - a.i = 0x80000000 - a.i; - if( b.i & 0x80000000 ) - b.i = 0x80000000 - b.i; + if (a.i & 0x80000000) a.i = 0x80000000 - a.i; + if (b.i & 0x80000000) b.i = 0x80000000 - b.i; - if( a.i == b.i ) - return 0; + if (a.i == b.i) return 0; return a.i < b.i ? -1 : 1; } -inline -int compareDoubles(double x, double y) +inline int compareDoubles(double x, double y) { int64d_t a, b; a.d = x; b.d = y; - if( a.l & 0x8000000000000000LL ) - a.l = 0x8000000000000000LL - a.l; - if( b.l & 0x8000000000000000LL ) - b.l = 0x8000000000000000LL - b.l; + if (a.l & 0x8000000000000000LL) a.l = 0x8000000000000000LL - a.l; + if (b.l & 0x8000000000000000LL) b.l = 0x8000000000000000LL - b.l; - if( a.l == b.l ) - return 0; + if (a.l == b.l) return 0; return a.l < b.l ? -1 : 1; } -void logFunctionInfo(const char *fname, unsigned int float_size, unsigned int isFastRelaxed) +void logFunctionInfo(const char *fname, unsigned int float_size, + unsigned int isFastRelaxed) { char const *fpSizeStr = NULL; char const *fpFastRelaxedStr = ""; - switch (float_size) { - case sizeof(cl_double): - fpSizeStr = "fp64"; - break; - case sizeof(cl_float): - fpSizeStr = "fp32"; - break; - case sizeof(cl_half): - fpSizeStr = "fp16"; - break; + switch (float_size) + { + case sizeof(cl_double): fpSizeStr = "fp64"; break; + case sizeof(cl_float): fpSizeStr = "fp32"; break; + case sizeof(cl_half): fpSizeStr = "fp16"; break; } - if (isFastRelaxed) { + if (isFastRelaxed) + { fpFastRelaxedStr = "rlx"; } - vlog("%15s %4s %4s",fname, fpSizeStr, fpFastRelaxedStr); + vlog("%15s %4s %4s", fname, fpSizeStr, fpFastRelaxedStr); } float getAllowedUlpError(const Func *f, const bool relaxed) @@ -196,4 +189,4 @@ float getAllowedUlpError(const Func *f, const bool relaxed) } return ulp; -} \ No newline at end of file +} diff --git a/test_conformance/math_brute_force/utility.h b/test_conformance/math_brute_force/utility.h new file mode 100644 index 0000000000..b4a59edb55 --- /dev/null +++ b/test_conformance/math_brute_force/utility.h @@ -0,0 +1,263 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#ifndef UTILITY_H +#define UTILITY_H + +#include "harness/compat.h" +#include "harness/rounding_mode.h" +#include "harness/fpcontrol.h" +#include "harness/testHarness.h" +#include "harness/ThreadPool.h" +#include "harness/conversions.h" + +#define BUFFER_SIZE (1024 * 1024 * 2) +#define EMBEDDED_REDUCTION_FACTOR (64) + +#if defined(__GNUC__) +#define UNUSED __attribute__((unused)) +#else +#define UNUSED +#endif + +struct Func; + +extern int gWimpyReductionFactor; + +#define VECTOR_SIZE_COUNT 6 +extern const char *sizeNames[VECTOR_SIZE_COUNT]; +extern const int sizeValues[VECTOR_SIZE_COUNT]; + +extern cl_device_id gDevice; +extern cl_context gContext; +extern cl_command_queue gQueue; +extern void *gIn; +extern void *gIn2; +extern void *gIn3; +extern void *gOut_Ref; +extern void *gOut_Ref2; +extern void *gOut[VECTOR_SIZE_COUNT]; +extern void *gOut2[VECTOR_SIZE_COUNT]; +extern cl_mem gInBuffer; +extern cl_mem gInBuffer2; +extern cl_mem gInBuffer3; +extern cl_mem gOutBuffer[VECTOR_SIZE_COUNT]; +extern cl_mem gOutBuffer2[VECTOR_SIZE_COUNT]; +extern int gSkipCorrectnessTesting; +extern int gForceFTZ; +extern int gFastRelaxedDerived; +extern int gWimpyMode; +extern int gIsInRTZMode; +extern int gInfNanSupport; +extern int gIsEmbedded; +extern int gVerboseBruteForce; +extern uint32_t gMaxVectorSizeIndex; +extern uint32_t gMinVectorSizeIndex; +extern cl_device_fp_config gFloatCapabilities; + +#define LOWER_IS_BETTER 0 +#define HIGHER_IS_BETTER 1 + +#include "harness/errorHelpers.h" + +#if defined(_MSC_VER) +// Deal with missing scalbn on windows +#define scalbnf(_a, _i) ldexpf(_a, _i) +#define scalbn(_a, _i) ldexp(_a, _i) +#define scalbnl(_a, _i) ldexpl(_a, _i) +#endif + +float Abs_Error(float test, double reference); +float Ulp_Error(float test, double reference); +float Bruteforce_Ulp_Error_Double(double test, long double reference); + +int MakeKernel(const char **c, cl_uint count, const char *name, cl_kernel *k, + cl_program *p, bool relaxedMode); +int MakeKernels(const char **c, cl_uint count, const char *name, + cl_uint kernel_count, cl_kernel *k, cl_program *p, + bool relaxedMode); + +// used to convert a bucket of bits into a search pattern through double +inline double DoubleFromUInt32(uint32_t bits) +{ + union { + uint64_t u; + double d; + } u; + + // split 0x89abcdef to 0x89abc00000000def + u.u = bits & 0xfffU; + u.u |= (uint64_t)(bits & ~0xfffU) << 32; + + // sign extend the leading bit of def segment as sign bit so that the middle + // region consists of either all 1s or 0s + u.u -= (bits & 0x800U) << 1; + + // return result + return u.d; +} + +void _LogBuildError(cl_program p, int line, const char *file); +#define LogBuildError(program) _LogBuildError(program, __LINE__, __FILE__) + +// The spec is fairly clear that we may enforce a hard cutoff to prevent +// premature flushing to zero. +// However, to avoid conflict for 1.0, we are letting results at TYPE_MIN + +// ulp_limit to be flushed to zero. +inline int IsFloatResultSubnormal(double x, float ulps) +{ + x = fabs(x) - MAKE_HEX_DOUBLE(0x1.0p-149, 0x1, -149) * (double)ulps; + return x < MAKE_HEX_DOUBLE(0x1.0p-126, 0x1, -126); +} + +inline int IsFloatResultSubnormalAbsError(double x, float abs_err) +{ + x = x - abs_err; + return x < MAKE_HEX_DOUBLE(0x1.0p-126, 0x1, -126); +} + +inline int IsDoubleResultSubnormal(long double x, float ulps) +{ + x = fabsl(x) - MAKE_HEX_LONG(0x1.0p-1074, 0x1, -1074) * (long double)ulps; + return x < MAKE_HEX_LONG(0x1.0p-1022, 0x1, -1022); +} + +inline int IsFloatInfinity(double x) +{ + union { + cl_float d; + cl_uint u; + } u; + u.d = (cl_float)x; + return ((u.u & 0x7fffffffU) == 0x7F800000U); +} + +inline int IsFloatMaxFloat(double x) +{ + union { + cl_float d; + cl_uint u; + } u; + u.d = (cl_float)x; + return ((u.u & 0x7fffffffU) == 0x7F7FFFFFU); +} + +inline int IsFloatNaN(double x) +{ + union { + cl_float d; + cl_uint u; + } u; + u.d = (cl_float)x; + return ((u.u & 0x7fffffffU) > 0x7F800000U); +} + +cl_uint RoundUpToNextPowerOfTwo(cl_uint x); + +// Windows (since long double got deprecated) sets the x87 to 53-bit precision +// (that's x87 default state). This causes problems with the tests that +// convert long and ulong to float and double or otherwise deal with values +// that need more precision than 53-bit. So, set the x87 to 64-bit precision. +inline void Force64BitFPUPrecision(void) +{ +#if __MINGW32__ + // The usual method is to use _controlfp as follows: + // #include + // _controlfp(_PC_64, _MCW_PC); + // + // _controlfp is available on MinGW32 but not on MinGW64. Instead of having + // divergent code just use inline assembly which works for both. + unsigned short int orig_cw = 0; + unsigned short int new_cw = 0; + __asm__ __volatile__("fstcw %0" : "=m"(orig_cw)); + new_cw = orig_cw | 0x0300; // set precision to 64-bit + __asm__ __volatile__("fldcw %0" ::"m"(new_cw)); +#elif defined(_WIN32) && defined(__INTEL_COMPILER) + // Unfortunately, usual method (`_controlfp( _PC_64, _MCW_PC );') does *not* + // work on win.x64: > On the x64 architecture, changing the floating point + // precision is not supported. (Taken from + // http://msdn.microsoft.com/en-us/library/e9b52ceh%28v=vs.100%29.aspx) + int cw; + __asm { fnstcw cw } + ; // Get current value of FPU control word. + cw = cw & 0xfffffcff + | (3 << 8); // Set Precision Control to Double Extended Precision. + __asm { fldcw cw } + ; // Set new value of FPU control word. +#else + /* Implement for other platforms if needed */ +#endif +} + +void memset_pattern4(void *dest, const void *src_pattern, size_t bytes); + +union int32f_t { + int32_t i; + float f; +}; + +union int64d_t { + int64_t l; + double d; +}; + +void MulD(double *rhi, double *rlo, double u, double v); +void AddD(double *rhi, double *rlo, double a, double b); +void MulDD(double *rhi, double *rlo, double xh, double xl, double yh, + double yl); +void AddDD(double *rhi, double *rlo, double xh, double xl, double yh, + double yl); +void DivideDD(double *chi, double *clo, double a, double b); +int compareFloats(float x, float y); +int compareDoubles(double x, double y); + +void logFunctionInfo(const char *fname, unsigned int float_size, + unsigned int isFastRelaxed); + +float getAllowedUlpError(const Func *f, const bool relaxed); + +inline cl_uint getTestScale(size_t typeSize) +{ + if (gWimpyMode) + { + return (cl_uint)typeSize * 2 * gWimpyReductionFactor; + } + else if (gIsEmbedded) + { + return EMBEDDED_REDUCTION_FACTOR; + } + else + { + return 1; + } +} + +inline uint64_t getTestStep(size_t typeSize, size_t bufferSize) +{ + if (gWimpyMode) + { + return (1ULL << 32) * gWimpyReductionFactor / (512); + } + else if (gIsEmbedded) + { + return (BUFFER_SIZE / typeSize) * EMBEDDED_REDUCTION_FACTOR; + } + else + { + return bufferSize / typeSize; + } +} + +#endif /* UTILITY_H */ diff --git a/test_conformance/mem_host_flags/C_host_memory_block.h b/test_conformance/mem_host_flags/C_host_memory_block.h index 91b47abfce..1d3b47570e 100644 --- a/test_conformance/mem_host_flags/C_host_memory_block.h +++ b/test_conformance/mem_host_flags/C_host_memory_block.h @@ -69,31 +69,28 @@ C_host_memory_block::C_host_memory_block() template < class T> C_host_memory_block::~C_host_memory_block() { - if (pData!=NULL) delete pData; - num_elements = 0; + if (pData != NULL) delete[] pData; + num_elements = 0; } template < class T > void C_host_memory_block::Init(int num_elem, T & value) { - if (pData!=NULL) delete pData; - pData= new T [num_elem]; - for (int i=0; i void C_host_memory_block::Init(int num_elem) { - if (pData!=NULL) delete pData; - pData = new T [num_elem]; - for (int i=0; i void C_host_memory_block::Set_to_zero() diff --git a/test_conformance/pipes/test_pipe_info.cpp b/test_conformance/pipes/test_pipe_info.cpp index 7543c6cd99..e7b486dbe9 100644 --- a/test_conformance/pipes/test_pipe_info.cpp +++ b/test_conformance/pipes/test_pipe_info.cpp @@ -14,6 +14,7 @@ // limitations under the License. // #include "procs.h" +#include "harness/parseParameters.h" const char* pipe_kernel_code = { "__kernel void pipe_kernel(__write_only pipe int out_pipe)\n" @@ -39,8 +40,7 @@ int test_pipe_info( cl_device_id deviceID, cl_context context, cl_command_queue if (pipe_width != returnVal) { - log_error("Error in clGetPipeInfo() check of pipe packet size\n"); - return -1; + test_fail("Error in clGetPipeInfo() check of pipe packet size\n"); } else { @@ -52,29 +52,37 @@ int test_pipe_info( cl_device_id deviceID, cl_context context, cl_command_queue if(pipe_depth != returnVal) { - log_error( "Error in clGetPipeInfo() check of pipe max packets\n" ); - return -1; + test_fail("Error in clGetPipeInfo() check of pipe max packets\n"); } else { log_info( " CL_PIPE_MAX_PACKETS passed.\n" ); } - err = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, (const char**)&pipe_kernel_code, "pipe_kernel", "-cl-std=CL2.0 -cl-kernel-arg-info"); - test_error_ret(err, " Error creating program", -1); + err = create_single_kernel_helper_with_build_options( + context, &program, &kernel, 1, &pipe_kernel_code, "pipe_kernel", + "-cl-std=CL2.0 -cl-kernel-arg-info"); + test_error_fail(err, "Error creating program"); cl_kernel_arg_type_qualifier arg_type_qualifier = 0; - cl_kernel_arg_type_qualifier expected_type_qualifier = CL_KERNEL_ARG_TYPE_PIPE; - err = clGetKernelArgInfo( kernel, 0, CL_KERNEL_ARG_TYPE_QUALIFIER, sizeof(arg_type_qualifier), &arg_type_qualifier, NULL ); - test_error_ret(err, " clSetKernelArgInfo failed", -1); - err = (arg_type_qualifier != expected_type_qualifier); - - if(err) + err = clGetKernelArgInfo(kernel, 0, CL_KERNEL_ARG_TYPE_QUALIFIER, + sizeof(arg_type_qualifier), &arg_type_qualifier, + NULL); + if (gCompilationMode == kOnline) { - print_error(err, "ERROR: Bad type qualifier\n"); - return -1; + test_error_fail(err, "clGetKernelArgInfo failed"); + if (arg_type_qualifier != CL_KERNEL_ARG_TYPE_PIPE) + { + test_fail("ERROR: Incorrect type qualifier: %i\n", + arg_type_qualifier); + } + } + else + { + test_failure_error_ret(err, CL_KERNEL_ARG_INFO_NOT_AVAILABLE, + "clGetKernelArgInfo error not as expected", + TEST_FAIL); } - return err; - + return TEST_PASS; } diff --git a/test_conformance/pipes/test_pipe_limits.cpp b/test_conformance/pipes/test_pipe_limits.cpp index 85247f8289..169ab80c35 100644 --- a/test_conformance/pipes/test_pipe_limits.cpp +++ b/test_conformance/pipes/test_pipe_limits.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -218,9 +218,8 @@ int test_pipe_max_args(cl_device_id deviceID, cl_context context, cl_command_que const char *sources[] = { kernel_source.c_str() }; // Create producer kernel - err = create_single_kernel_helper_with_build_options( - context, &program, &kernel[0], 1, sources, kernelName[0], - "-cl-std=CL2.0"); + err = create_single_kernel_helper(context, &program, &kernel[0], 1, sources, + kernelName[0]); test_error_ret(err, " Error creating program", -1); //Create consumer kernel @@ -368,9 +367,8 @@ int test_pipe_max_packet_size(cl_device_id deviceID, cl_context context, cl_comm const char *sources[] = { kernel_source.c_str() }; // Create producer kernel - err = create_single_kernel_helper_with_build_options( - context, &program, &kernel[0], 1, sources, kernelName[0], - "-cl-std=CL2.0"); + err = create_single_kernel_helper(context, &program, &kernel[0], 1, sources, + kernelName[0]); test_error_ret(err, " Error creating program", -1); //Create consumer kernel @@ -533,9 +531,8 @@ int test_pipe_max_active_reservations(cl_device_id deviceID, cl_context context, const char *sources[] = { kernel_source.c_str() }; // Create producer kernel - err = create_single_kernel_helper_with_build_options( - context, &program, &kernel[0], 1, sources, kernelName[0], - "-cl-std=CL2.0"); + err = create_single_kernel_helper(context, &program, &kernel[0], 1, sources, + kernelName[0]); test_error_ret(err, " Error creating program", -1); // Create consumer kernel diff --git a/test_conformance/pipes/test_pipe_query_functions.cpp b/test_conformance/pipes/test_pipe_query_functions.cpp index f9c93aa230..21d195053a 100644 --- a/test_conformance/pipes/test_pipe_query_functions.cpp +++ b/test_conformance/pipes/test_pipe_query_functions.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -140,7 +140,9 @@ int test_pipe_query_functions(cl_device_id deviceID, cl_context context, cl_comm test_error_ret(err, " clCreatePipe failed", -1); // Create producer kernel - err = create_single_kernel_helper_with_build_options(context, &program, &kernel[0], 1, (const char**)&pipe_query_functions_kernel_code, kernelName[0], "-cl-std=CL2.0"); + err = create_single_kernel_helper( + context, &program, &kernel[0], 1, + (const char **)&pipe_query_functions_kernel_code, kernelName[0]); test_error_ret(err, " Error creating program", -1); //Create consumer kernel diff --git a/test_conformance/pipes/test_pipe_read_write.cpp b/test_conformance/pipes/test_pipe_read_write.cpp index 64ee31b3f4..dd0d1216bb 100644 --- a/test_conformance/pipes/test_pipe_read_write.cpp +++ b/test_conformance/pipes/test_pipe_read_write.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -531,9 +531,8 @@ int test_pipe_readwrite( cl_device_id deviceID, cl_context context, cl_command_q std::string kernel_source = sourceCode[i].str(); const char *sources[] = { kernel_source.c_str() }; // Create producer kernel - err = create_single_kernel_helper_with_build_options( - context, &program[i], &kernel[ii], 1, sources, kernelName[ii], - "-cl-std=CL2.0"); + err = create_single_kernel_helper(context, &program[i], &kernel[ii], 1, + sources, kernelName[ii]); test_error_ret(err, " Error creating program", -1); @@ -659,7 +658,8 @@ int test_pipe_readwrite_struct_generic( cl_device_id deviceID, cl_context contex test_error_ret(err, " clCreatePipe failed", -1); // Create producer kernel - err = create_single_kernel_helper_with_build_options(context, &program, &kernel[0], 1, &kernelCode, kernelName[0], "-cl-std=CL2.0"); + err = create_single_kernel_helper(context, &program, &kernel[0], 1, + &kernelCode, kernelName[0]); test_error_ret(err, " Error creating program", -1); //Create consumer kernel diff --git a/test_conformance/pipes/test_pipe_readwrite_errors.cpp b/test_conformance/pipes/test_pipe_readwrite_errors.cpp index 1b9fc31388..d4b4524876 100644 --- a/test_conformance/pipes/test_pipe_readwrite_errors.cpp +++ b/test_conformance/pipes/test_pipe_readwrite_errors.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -115,7 +115,9 @@ int test_pipe_readwrite_errors(cl_device_id deviceID, cl_context context, cl_com test_error_ret(err, " clCreatePipe failed", -1); // Create producer kernel - err = create_single_kernel_helper_with_build_options(context, &program, &kernel[0], 1, (const char**)&pipe_readwrite_errors_kernel_code, kernelName[0], "-cl-std=CL2.0"); + err = create_single_kernel_helper( + context, &program, &kernel[0], 1, + (const char **)&pipe_readwrite_errors_kernel_code, kernelName[0]); test_error_ret(err, " Error creating program", -1); //Create consumer kernel diff --git a/test_conformance/pipes/test_pipe_subgroups.cpp b/test_conformance/pipes/test_pipe_subgroups.cpp index b41170ca08..b3e17183d2 100644 --- a/test_conformance/pipes/test_pipe_subgroups.cpp +++ b/test_conformance/pipes/test_pipe_subgroups.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -146,7 +146,9 @@ int test_pipe_subgroups_divergence(cl_device_id deviceID, cl_context context, cl test_error_ret(err, " clCreatePipe failed", -1); // Create producer kernel - err = create_single_kernel_helper_with_build_options(context, &program, &kernel[0], 1, (const char**)&pipe_subgroups_kernel_code, kernelName[0], "-cl-std=CL2.0"); + err = create_single_kernel_helper( + context, &program, &kernel[0], 1, + (const char **)&pipe_subgroups_kernel_code, kernelName[0]); test_error_ret(err, " Error creating program", -1); //Create consumer kernel diff --git a/test_conformance/printf/test_printf.cpp b/test_conformance/printf/test_printf.cpp index b169e6b9bc..2b804e402f 100644 --- a/test_conformance/printf/test_printf.cpp +++ b/test_conformance/printf/test_printf.cpp @@ -306,15 +306,22 @@ static cl_program makePrintfProgram(cl_kernel *kernel_ptr, const cl_context cont if(allTestCase[testId]->_type == TYPE_VECTOR) { - err = create_single_kernel_helper(context, &program, NULL, sizeof(sourceVec) / sizeof(sourceVec[0]), sourceVec, NULL); + err = create_single_kernel_helper( + context, &program, kernel_ptr, + sizeof(sourceVec) / sizeof(sourceVec[0]), sourceVec, testname); } else if(allTestCase[testId]->_type == TYPE_ADDRESS_SPACE) { - err = create_single_kernel_helper(context, &program, NULL, sizeof(sourceAddrSpace) / sizeof(sourceAddrSpace[0]), sourceAddrSpace, NULL); + err = create_single_kernel_helper(context, &program, kernel_ptr, + sizeof(sourceAddrSpace) + / sizeof(sourceAddrSpace[0]), + sourceAddrSpace, testname); } else { - err = create_single_kernel_helper(context, &program, NULL, sizeof(sourceGen) / sizeof(sourceGen[0]), sourceGen, NULL); + err = create_single_kernel_helper( + context, &program, kernel_ptr, + sizeof(sourceGen) / sizeof(sourceGen[0]), sourceGen, testname); } if (!program || err) { @@ -322,12 +329,6 @@ static cl_program makePrintfProgram(cl_kernel *kernel_ptr, const cl_context cont return NULL; } - *kernel_ptr = clCreateKernel(program, testname, &err); - if ( err ) { - log_error("clCreateKernel failed (%d)\n", err); - return NULL; - } - return program; } diff --git a/test_conformance/profiling/execute.cpp b/test_conformance/profiling/execute.cpp index edfc043c06..0541bfa52a 100644 --- a/test_conformance/profiling/execute.cpp +++ b/test_conformance/profiling/execute.cpp @@ -21,6 +21,8 @@ #include #include +#include + #include "procs.h" #include "harness/testHarness.h" #include "harness/errorHelpers.h" @@ -29,12 +31,6 @@ typedef unsigned char uchar; #endif -#undef MIN -#define MIN(x,y) ( (x) < (y) ? (x) : (y) ) - -#undef MAX -#define MAX(x,y) ( (x) > (y) ? (x) : (y) ) - //#define CREATE_OUTPUT 1 extern int writePPM( const char *filename, uchar *buf, int xsize, int ysize ); @@ -73,8 +69,8 @@ static const char *image_filter_src = static void read_imagef( int x, int y, int w, int h, int nChannels, uchar *src, float *srcRgb ) { // clamp the coords - int x0 = MIN( MAX( x, 0 ), w - 1 ); - int y0 = MIN( MAX( y, 0 ), h - 1 ); + int x0 = std::min(std::max(x, 0), w - 1); + int y0 = std::min(std::max(y, 0), h - 1); // get tine index int indx = ( y0 * w + x0 ) * nChannels; diff --git a/test_conformance/select/util_select.cpp b/test_conformance/select/util_select.cpp index 71c58bc2fb..f9641e9938 100644 --- a/test_conformance/select/util_select.cpp +++ b/test_conformance/select/util_select.cpp @@ -561,13 +561,18 @@ size_t check_uchar(void *test, void *correct, size_t count, size_t vector_size) const cl_uchar *c = (const cl_uchar *) correct; size_t i; - for(i = 0; i < count; i++) - if (t[i] != c[i]) { - log_error("\n(check_uchar) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx): " - "*0x%2.2x vs 0x%2.2x\n", vector_size, i, count, c[i], t[i]); - return i + 1; - } - + if (memcmp(t, c, count * sizeof(c[0])) != 0) + { + for (i = 0; i < count; i++) + if (t[i] != c[i]) + { + log_error("\n(check_uchar) Error for vector size %ld found at " + "0x%8.8lx (of 0x%8.8lx): " + "*0x%2.2x vs 0x%2.2x\n", + vector_size, i, count, c[i], t[i]); + return i + 1; + } + } return 0; } @@ -576,13 +581,18 @@ size_t check_char(void *test, void *correct, size_t count, size_t vector_size) { const cl_char *c = (const cl_char *) correct; size_t i; - - for( i = 0; i < count; i++ ) - if( t[i] != c[i] ) { - log_error("\n(check_char) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx): " - "*0x%2.2x vs 0x%2.2x\n", vector_size, i, count, c[i], t[i] ); - return i + 1; - } + if (memcmp(t, c, count * sizeof(c[0])) != 0) + { + for (i = 0; i < count; i++) + if (t[i] != c[i]) + { + log_error("\n(check_char) Error for vector size %ld found at " + "0x%8.8lx (of 0x%8.8lx): " + "*0x%2.2x vs 0x%2.2x\n", + vector_size, i, count, c[i], t[i]); + return i + 1; + } + } return 0; } @@ -592,13 +602,18 @@ size_t check_ushort(void *test, void *correct, size_t count, size_t vector_size) const cl_ushort *c = (const cl_ushort *) correct; size_t i; - - for( i = 0; i < count; i++ ) - if(t[i] != c[i]) { - log_error("\n(check_ushort) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx): " - "*0x%4.4x vs 0x%4.4x\n", vector_size, i, count, c[i], t[i] ); - return i + 1; - } + if (memcmp(t, c, count * sizeof(c[0])) != 0) + { + for (i = 0; i < count; i++) + if (t[i] != c[i]) + { + log_error("\n(check_ushort) Error for vector size %ld found at " + "0x%8.8lx (of 0x%8.8lx): " + "*0x%4.4x vs 0x%4.4x\n", + vector_size, i, count, c[i], t[i]); + return i + 1; + } + } return 0; } @@ -608,13 +623,18 @@ size_t check_short(void *test, void *correct, size_t count, size_t vector_size) const cl_short *c = (const cl_short *) correct; size_t i; - - for (i = 0; i < count; i++) - if(t[i] != c[i]) { - log_error("\n(check_short) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx): " - "*0x%8.8x vs 0x%8.8x\n", vector_size, i, count, c[i], t[i] ); - return i + 1; - } + if (memcmp(t, c, count * sizeof(c[0])) != 0) + { + for (i = 0; i < count; i++) + if (t[i] != c[i]) + { + log_error("\n(check_short) Error for vector size %ld found at " + "0x%8.8lx (of 0x%8.8lx): " + "*0x%8.8x vs 0x%8.8x\n", + vector_size, i, count, c[i], t[i]); + return i + 1; + } + } return 0; } @@ -624,14 +644,18 @@ size_t check_uint(void *test, void *correct, size_t count, size_t vector_size) { const cl_uint *c = (const cl_uint *) correct; size_t i; - - - for (i = 0; i < count; i++) - if(t[i] != c[i]) { - log_error("\n(check_uint) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx): " - "*0x%8.8x vs 0x%8.8x\n", vector_size, i, count, c[i], t[i] ); - return i + 1; - } + if (memcmp(t, c, count * sizeof(c[0])) != 0) + { + for (i = 0; i < count; i++) + if (t[i] != c[i]) + { + log_error("\n(check_uint) Error for vector size %ld found at " + "0x%8.8lx (of 0x%8.8lx): " + "*0x%8.8x vs 0x%8.8x\n", + vector_size, i, count, c[i], t[i]); + return i + 1; + } + } return 0; } @@ -641,24 +665,19 @@ size_t check_int(void *test, void *correct, size_t count, size_t vector_size) { const cl_int *c = (const cl_int *) correct; size_t i; + if (memcmp(t, c, count * sizeof(c[0])) != 0) + { + for (i = 0; i < count; i++) + if (t[i] != c[i]) + { - for(i = 0; i < count; i++) - if( t[i] != c[i] ) { - - log_error("\n(check_int) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx): " - "*0x%8.8x vs 0x%8.8x\n", vector_size, i, count, c[i], t[i]); - log_error("\n(check_int) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx): " - "*0x%8.8x vs 0x%8.8x\n", vector_size, i+1, count,c[i+1], t[i+1]); - log_error("\n(check_int) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx): " - "*0x%8.8x vs 0x%8.8x\n", vector_size, i+2, count,c[i+2], t[i+2]); - log_error("\n(check_int) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx): " - "*0x%8.8x vs 0x%8.8x\n", vector_size, i+3, count,c[i+3], t[i+3]); - if(i) { - log_error("\n(check_int) Error for vector size %ld found just after 0x%8.8lx: " - "*0x%8.8x vs 0x%8.8x\n", vector_size, i-1, c[i-1], t[i-1]); + log_error("\n(check_int) Error for vector size %ld found at " + "0x%8.8lx (of 0x%8.8lx): " + "*0x%8.8x vs 0x%8.8x\n", + vector_size, i, count, c[i], t[i]); + return i + 1; } - return i + 1; - } + } return 0; } @@ -668,13 +687,18 @@ size_t check_ulong(void *test, void *correct, size_t count, size_t vector_size) const cl_ulong *c = (const cl_ulong *) correct; size_t i; - - for( i = 0; i < count; i++ ) - if( t[i] != c[i] ) { - log_error("\n(check_ulong) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx): " - "*0x%16.16llx vs 0x%16.16llx\n", vector_size, i, count, c[i], t[i] ); - return i + 1; - } + if (memcmp(t, c, count * sizeof(c[0])) != 0) + { + for (i = 0; i < count; i++) + if (t[i] != c[i]) + { + log_error("\n(check_ulong) Error for vector size %ld found at " + "0x%8.8lx (of 0x%8.8lx): " + "*0x%16.16llx vs 0x%16.16llx\n", + vector_size, i, count, c[i], t[i]); + return i + 1; + } + } return 0; } @@ -684,13 +708,18 @@ size_t check_long(void *test, void *correct, size_t count, size_t vector_size) { const cl_long *c = (const cl_long *) correct; size_t i; - - for(i = 0; i < count; i++ ) - if(t[i] != c[i]) { - log_error("\n(check_long) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx): " - "*0x%16.16llx vs 0x%16.16llx\n", vector_size, i, count, c[i], t[i] ); - return i + 1; - } + if (memcmp(t, c, count * sizeof(c[0])) != 0) + { + for (i = 0; i < count; i++) + if (t[i] != c[i]) + { + log_error("\n(check_long) Error for vector size %ld found at " + "0x%8.8lx (of 0x%8.8lx): " + "*0x%16.16llx vs 0x%16.16llx\n", + vector_size, i, count, c[i], t[i]); + return i + 1; + } + } return 0; } @@ -700,14 +729,19 @@ size_t check_float( void *test, void *correct, size_t count, size_t vector_size const cl_uint *c = (const cl_uint *) correct; size_t i; - - for( i = 0; i < count; i++ ) - /* Allow nans to be binary different */ - if ((t[i] != c[i]) && !(isnan(((float *)correct)[i]) && isnan(((float *)test)[i]))) { - log_error("\n(check_float) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx): " - "*0x%8.8x vs 0x%8.8x\n", vector_size, i, count, c[i], t[i] ); - return i + 1; - } + if (memcmp(t, c, count * sizeof(c[0])) != 0) + { + for (i = 0; i < count; i++) /* Allow nans to be binary different */ + if ((t[i] != c[i]) + && !(isnan(((float *)correct)[i]) && isnan(((float *)test)[i]))) + { + log_error("\n(check_float) Error for vector size %ld found at " + "0x%8.8lx (of 0x%8.8lx): " + "*0x%8.8x vs 0x%8.8x\n", + vector_size, i, count, c[i], t[i]); + return i + 1; + } + } return 0; } @@ -717,15 +751,20 @@ size_t check_double( void *test, void *correct, size_t count, size_t vector_size const cl_ulong *c = (const cl_ulong *) correct; size_t i; - - - for( i = 0; i < count; i++ ) - /* Allow nans to be binary different */ - if ((t[i] != c[i]) && !(isnan(((double *)correct)[i]) && isnan(((double *)test)[i]))) { - log_error("\n(check_double) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx): " - "*0x%16.16llx vs 0x%16.16llx\n", vector_size, i, count, c[i], t[i] ); - return i + 1; - } + if (memcmp(t, c, count * sizeof(c[0])) != 0) + { + for (i = 0; i < count; i++) /* Allow nans to be binary different */ + if ((t[i] != c[i]) + && !(isnan(((double *)correct)[i]) + && isnan(((double *)test)[i]))) + { + log_error("\n(check_double) Error for vector size %ld found at " + "0x%8.8lx (of 0x%8.8lx): " + "*0x%16.16llx vs 0x%16.16llx\n", + vector_size, i, count, c[i], t[i]); + return i + 1; + } + } return 0; } diff --git a/test_conformance/spir/CMakeLists.txt b/test_conformance/spir/CMakeLists.txt index 70effa15a4..f65c03139f 100644 --- a/test_conformance/spir/CMakeLists.txt +++ b/test_conformance/spir/CMakeLists.txt @@ -9,7 +9,7 @@ set (SPIR_SOURCES run_build_test.cpp run_services.cpp kernelargs.cpp - ../math_brute_force/FunctionList.cpp + ../math_brute_force/function_list.cpp ) add_executable(${SPIR_OUT} diff --git a/test_conformance/spir/main.cpp b/test_conformance/spir/main.cpp index 3a18988c70..06caf33bf7 100644 --- a/test_conformance/spir/main.cpp +++ b/test_conformance/spir/main.cpp @@ -6615,40 +6615,45 @@ struct sub_suite }; static const sub_suite spir_suites[] = { - {"api", "api", test_api}, - {"api_double", "api", test_api_double}, - {"atomics", "atomics", test_atomics}, - {"basic", "basic", test_basic}, - {"basic_double", "basic", test_basic_double}, - {"commonfns", "commonfns", test_commonfns}, - {"commonfns_double", "commonfns", test_commonfns_double}, - {"conversions", "conversions", test_conversions}, - {"conversions_double", "conversions", test_conversions_double}, - {"geometrics", "geometrics", test_geometrics}, - {"geometrics_double", "geometrics", test_geometrics_double}, - {"half", "half", test_half}, - {"half_double", "half", test_half_double}, - {"kernel_image_methods", "kernel_image_methods", test_kernel_image_methods}, - {"images_kernel_read_write", "images_kernel_read_write", test_images_kernel_read_write}, - {"images_samplerlessRead", "images_samplerlessRead", test_images_samplerless_read}, - {"integer_ops", "integer_ops", test_integer_ops}, - {"math_brute_force", "math_brute_force", test_math_brute_force}, - {"math_brute_force_double", "math_brute_force", test_math_brute_force_double}, - {"printf", "printf", test_printf}, - {"profiling", "profiling", test_profiling}, - {"relationals", "relationals", test_relationals}, - {"relationals_double", "relationals", test_relationals_double}, - {"select", "select", test_select}, - {"select_double", "select", test_select_double}, - {"vec_align", "vec_align", test_vec_align}, - {"vec_align_double", "vec_align", test_vec_align_double}, - {"vec_step", "vec_step", test_vec_step}, - {"vec_step_double", "vec_step", test_vec_step_double}, - {"compile_and_link", "compile_and_link", test_compile_and_link}, - {"sampler_enumeration", "sampler_enumeration", test_sampler_enumeration}, - {"enum_values", "enum_values", test_enum_values}, - {"kernel_attributes", "kernel_attributes", test_kernel_attributes}, - {"binary_type", "binary_type", test_binary_type}, + { "api", "api", test_api }, + { "api_double", "api", test_api_double }, + { "atomics", "atomics", test_atomics }, + { "basic", "basic", test_basic }, + { "basic_double", "basic", test_basic_double }, + { "commonfns", "commonfns", test_commonfns }, + { "commonfns_double", "commonfns", test_commonfns_double }, + { "conversions", "conversions", test_conversions }, + { "conversions_double", "conversions", test_conversions_double }, + { "geometrics", "geometrics", test_geometrics }, + { "geometrics_double", "geometrics", test_geometrics_double }, + { "half", "half", test_half }, + { "half_double", "half", test_half_double }, + { "kernel_image_methods", "kernel_image_methods", + test_kernel_image_methods }, + { "images_kernel_read_write", "images_kernel_read_write", + test_images_kernel_read_write }, + { "images_samplerlessRead", "images_samplerlessRead", + test_images_samplerless_read }, + { "integer_ops", "integer_ops", test_integer_ops }, + { "math_brute_force", "math_brute_force", test_math_brute_force }, + { "math_brute_force_double", "math_brute_force", + test_math_brute_force_double }, + { "printf", "printf", test_printf }, + { "profiling", "profiling", test_profiling }, + { "relationals", "relationals", test_relationals }, + { "relationals_double", "relationals", test_relationals_double }, + { "select", "select", test_select }, + { "select_double", "select", test_select_double }, + { "vec_align", "vec_align", test_vec_align }, + { "vec_align_double", "vec_align", test_vec_align_double }, + { "vec_step", "vec_step", test_vec_step }, + { "vec_step_double", "vec_step", test_vec_step_double }, + { "compile_and_link", "compile_and_link", test_compile_and_link }, + { "sampler_enumeration", "sampler_enumeration", test_sampler_enumeration }, + { "enum_values", "enum_values", test_enum_values }, + // {"kernel_attributes", "kernel_attributes", + // test_kernel_attributes}, // disabling temporarily, see GitHub #1284 + { "binary_type", "binary_type", test_binary_type }, }; diff --git a/test_conformance/spir/run_build_test.cpp b/test_conformance/spir/run_build_test.cpp index cec2d27552..9264d3a48a 100644 --- a/test_conformance/spir/run_build_test.cpp +++ b/test_conformance/spir/run_build_test.cpp @@ -33,12 +33,12 @@ #include "harness/clImageHelper.h" #include "harness/os_helpers.h" +#include "../math_brute_force/function_list.h" +#include "datagen.h" #include "exceptions.h" #include "kernelargs.h" -#include "datagen.h" -#include "run_services.h" #include "run_build_test.h" -#include "../math_brute_force/FunctionList.h" +#include "run_services.h" #include // // Task diff --git a/test_conformance/spirv_new/CMakeLists.txt b/test_conformance/spirv_new/CMakeLists.txt index 614d5a79fa..7500571d07 100644 --- a/test_conformance/spirv_new/CMakeLists.txt +++ b/test_conformance/spirv_new/CMakeLists.txt @@ -18,8 +18,8 @@ file(GLOB SPIRV_NEW_SOURCES "*.cpp") set(TEST_HARNESS_SOURCES ../../test_conformance/math_brute_force/reference_math.cpp - ../../test_conformance/math_brute_force/Utility.cpp - ) + ../../test_conformance/math_brute_force/utility.cpp +) set(${MODULE_NAME}_SOURCES ${SPIRV_NEW_SOURCES} ${TEST_HARNESS_SOURCES}) diff --git a/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp b/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp index 84f8ed1f09..9e1789c27d 100644 --- a/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp +++ b/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp @@ -129,13 +129,9 @@ int test_ext_cl_khr_spirv_no_integer_wrap_decoration(cl_device_id deviceID, { // Run the cl kernel for reference results clProgramWrapper prog; - err = create_single_kernel_helper_create_program(context, &prog, 1, &kernelBuf, NULL); - SPIRV_CHECK_ERROR(err, "Failed to create cl program"); - - err = clBuildProgram(prog, 1, &deviceID, NULL, NULL, NULL); - SPIRV_CHECK_ERROR(err, "Failed to build program"); - - clKernelWrapper kernel = clCreateKernel(prog, "fmath_cl", &err); + clKernelWrapper kernel; + err = create_single_kernel_helper(context, &prog, &kernel, 1, + &kernelBuf, "fmath_cl"); SPIRV_CHECK_ERROR(err, "Failed to create cl kernel"); clMemWrapper ref = clCreateBuffer(context, CL_MEM_READ_WRITE, bytes, NULL, &err); diff --git a/test_conformance/spirv_new/test_op_fmath.cpp b/test_conformance/spirv_new/test_op_fmath.cpp index 7250eb159f..bec0667ce6 100644 --- a/test_conformance/spirv_new/test_op_fmath.cpp +++ b/test_conformance/spirv_new/test_op_fmath.cpp @@ -89,13 +89,9 @@ int test_fmath(cl_device_id deviceID, { // Run the cl kernel for reference results clProgramWrapper prog; - err = create_single_kernel_helper_create_program(context, &prog, 1, &kernelBuf, NULL); - SPIRV_CHECK_ERROR(err, "Failed to create cl program"); - - err = clBuildProgram(prog, 1, &deviceID, NULL, NULL, NULL); - SPIRV_CHECK_ERROR(err, "Failed to build program"); - - clKernelWrapper kernel = clCreateKernel(prog, "fmath_cl", &err); + clKernelWrapper kernel; + err = create_single_kernel_helper(context, &prog, &kernel, 1, + &kernelBuf, "fmath_cl"); SPIRV_CHECK_ERROR(err, "Failed to create cl kernel"); clMemWrapper ref = clCreateBuffer(context, CL_MEM_READ_WRITE, bytes, NULL, &err); diff --git a/test_conformance/spirv_new/test_op_vector_times_scalar.cpp b/test_conformance/spirv_new/test_op_vector_times_scalar.cpp index 99d71f726a..0a604bcf1a 100644 --- a/test_conformance/spirv_new/test_op_vector_times_scalar.cpp +++ b/test_conformance/spirv_new/test_op_vector_times_scalar.cpp @@ -82,15 +82,11 @@ int test_vector_times_scalar(cl_device_id deviceID, { // Run the cl kernel for reference results clProgramWrapper prog; - err = create_single_kernel_helper_create_program(context, &prog, 1, &kernelBuf, NULL); + clKernelWrapper kernel; + err = create_single_kernel_helper(context, &prog, &kernel, 1, + &kernelBuf, "vector_times_scalar"); SPIRV_CHECK_ERROR(err, "Failed to create cl program"); - err = clBuildProgram(prog, 1, &deviceID, NULL, NULL, NULL); - SPIRV_CHECK_ERROR(err, "Failed to build program"); - - clKernelWrapper kernel = clCreateKernel(prog, "vector_times_scalar", &err); - SPIRV_CHECK_ERROR(err, "Failed to create cl kernel"); - clMemWrapper ref = clCreateBuffer(context, CL_MEM_READ_WRITE, res_bytes, NULL, &err); SPIRV_CHECK_ERROR(err, "Failed to create ref buffer"); diff --git a/test_conformance/subgroups/CMakeLists.txt b/test_conformance/subgroups/CMakeLists.txt index eb6a6079c2..d48af9ccfc 100644 --- a/test_conformance/subgroups/CMakeLists.txt +++ b/test_conformance/subgroups/CMakeLists.txt @@ -5,8 +5,16 @@ set(${MODULE_NAME}_SOURCES test_barrier.cpp test_queries.cpp test_workitem.cpp - test_workgroup.cpp + test_subgroup.cpp test_ifp.cpp + test_subgroup_extended_types.cpp + subgroup_common_kernels.cpp + test_subgroup_non_uniform_vote.cpp + test_subgroup_non_uniform_arithmetic.cpp + test_subgroup_ballot.cpp + test_subgroup_clustered_reduce.cpp + test_subgroup_shuffle.cpp + test_subgroup_shuffle_relative.cpp ) include(../CMakeCommon.txt) diff --git a/test_conformance/subgroups/main.cpp b/test_conformance/subgroups/main.cpp index f9a9a9d515..ebe9455836 100644 --- a/test_conformance/subgroups/main.cpp +++ b/test_conformance/subgroups/main.cpp @@ -19,20 +19,29 @@ #include #include "procs.h" #include "harness/testHarness.h" +#include "CL/cl_half.h" MTdata gMTdata; +cl_half_rounding_mode g_rounding_mode; test_definition test_list[] = { ADD_TEST_VERSION(sub_group_info_ext, Version(2, 0)), ADD_TEST_VERSION(sub_group_info_core, Version(2, 1)), ADD_TEST_VERSION(work_item_functions_ext, Version(2, 0)), ADD_TEST_VERSION(work_item_functions_core, Version(2, 1)), - ADD_TEST_VERSION(work_group_functions_ext, Version(2, 0)), - ADD_TEST_VERSION(work_group_functions_core, Version(2, 1)), + ADD_TEST_VERSION(subgroup_functions_ext, Version(2, 0)), + ADD_TEST_VERSION(subgroup_functions_core, Version(2, 1)), ADD_TEST_VERSION(barrier_functions_ext, Version(2, 0)), ADD_TEST_VERSION(barrier_functions_core, Version(2, 1)), ADD_TEST_VERSION(ifp_ext, Version(2, 0)), - ADD_TEST_VERSION(ifp_core, Version(2, 1)) + ADD_TEST_VERSION(ifp_core, Version(2, 1)), + ADD_TEST(subgroup_functions_extended_types), + ADD_TEST(subgroup_functions_non_uniform_vote), + ADD_TEST(subgroup_functions_non_uniform_arithmetic), + ADD_TEST(subgroup_functions_ballot), + ADD_TEST(subgroup_functions_clustered_reduce), + ADD_TEST(subgroup_functions_shuffle), + ADD_TEST(subgroup_functions_shuffle_relative) }; const int test_num = ARRAY_SIZE(test_list); @@ -59,6 +68,22 @@ static test_status InitCL(cl_device_id device) ret = TEST_SKIP; } } + // Determine the rounding mode to be used in float to half conversions in + // init and reference code + const cl_device_fp_config fpConfig = get_default_rounding_mode(device); + + if (fpConfig == CL_FP_ROUND_TO_NEAREST) + { + g_rounding_mode = CL_HALF_RTE; + } + else if (fpConfig == CL_FP_ROUND_TO_ZERO && gIsEmbedded) + { + g_rounding_mode = CL_HALF_RTZ; + } + else + { + assert(false && "Unreachable"); + } return ret; } diff --git a/test_conformance/subgroups/procs.h b/test_conformance/subgroups/procs.h index 3ebb13b53e..d09e8242f6 100644 --- a/test_conformance/subgroups/procs.h +++ b/test_conformance/subgroups/procs.h @@ -37,14 +37,12 @@ extern int test_work_item_functions_core(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); -extern int test_work_group_functions_ext(cl_device_id device, - cl_context context, - cl_command_queue queue, - int num_elements); -extern int test_work_group_functions_core(cl_device_id device, - cl_context context, - cl_command_queue queue, - int num_elements); +extern int test_subgroup_functions_ext(cl_device_id device, cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_subgroup_functions_core(cl_device_id device, cl_context context, + cl_command_queue queue, + int num_elements); extern int test_barrier_functions_ext(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_barrier_functions_core(cl_device_id device, cl_context context, @@ -56,5 +54,31 @@ extern int test_ifp_ext(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_ifp_core(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); - +extern int test_subgroup_functions_extended_types(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_subgroup_functions_non_uniform_vote(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_subgroup_functions_non_uniform_arithmetic( + cl_device_id device, cl_context context, cl_command_queue queue, + int num_elements); +extern int test_subgroup_functions_ballot(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_subgroup_functions_clustered_reduce(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_subgroup_functions_shuffle(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_subgroup_functions_shuffle_relative(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); #endif /*_procs_h*/ diff --git a/test_conformance/subgroups/subgroup_common_kernels.cpp b/test_conformance/subgroups/subgroup_common_kernels.cpp new file mode 100644 index 0000000000..33a51637d6 --- /dev/null +++ b/test_conformance/subgroups/subgroup_common_kernels.cpp @@ -0,0 +1,34 @@ +// +// Copyright (c) 2021 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "subgroup_common_kernels.h" + + +std::string sub_group_reduction_scan_source = R"( + __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out) { + int gid = get_global_id(0); + XY(xy,gid); + out[gid] = %s(in[gid]); + } +)"; + +std::string sub_group_generic_source = R"( + __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out) { + int gid = get_global_id(0); + XY(xy,gid); + Type x = in[gid]; + out[gid] = %s(x, xy[gid].z); + } +)"; \ No newline at end of file diff --git a/test_conformance/clcpp/reinterpret/main.cpp b/test_conformance/subgroups/subgroup_common_kernels.h similarity index 66% rename from test_conformance/clcpp/reinterpret/main.cpp rename to test_conformance/subgroups/subgroup_common_kernels.h index 06d7056f3e..bf2210ef3d 100644 --- a/test_conformance/clcpp/reinterpret/main.cpp +++ b/test_conformance/subgroups/subgroup_common_kernels.h @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2021 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -13,13 +13,12 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "../common.hpp" +#ifndef SUBGROUPKERNELSOURCES_H +#define SUBGROUPKERNELSOURCES_H +#include "subhelpers.h" -#include "as_type.hpp" +extern std::string sub_group_reduction_scan_source; +extern std::string sub_group_generic_source; -int main(int argc, const char *argv[]) -{ - auto& tests = autotest::test_suite::global_test_suite().test_defs; - return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0); -} +#endif diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h new file mode 100644 index 0000000000..641c187585 --- /dev/null +++ b/test_conformance/subgroups/subgroup_common_templates.h @@ -0,0 +1,925 @@ +// +// Copyright (c) 2020 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#ifndef SUBGROUPCOMMONTEMPLATES_H +#define SUBGROUPCOMMONTEMPLATES_H + +#include "typeWrappers.h" +#include "CL/cl_half.h" +#include "subhelpers.h" +#include +#include +#include + +static cl_uint4 generate_bit_mask(cl_uint subgroup_local_id, + const std::string &mask_type, + cl_uint max_sub_group_size) +{ + bs128 mask128; + cl_uint4 mask; + cl_uint pos = subgroup_local_id; + if (mask_type == "eq") mask128.set(pos); + if (mask_type == "le" || mask_type == "lt") + { + for (cl_uint i = 0; i <= pos; i++) mask128.set(i); + if (mask_type == "lt") mask128.reset(pos); + } + if (mask_type == "ge" || mask_type == "gt") + { + for (cl_uint i = pos; i < max_sub_group_size; i++) mask128.set(i); + if (mask_type == "gt") mask128.reset(pos); + } + + // convert std::bitset<128> to uint4 + auto const uint_mask = bs128{ static_cast(-1) }; + mask.s0 = (mask128 & uint_mask).to_ulong(); + mask128 >>= 32; + mask.s1 = (mask128 & uint_mask).to_ulong(); + mask128 >>= 32; + mask.s2 = (mask128 & uint_mask).to_ulong(); + mask128 >>= 32; + mask.s3 = (mask128 & uint_mask).to_ulong(); + + return mask; +} + +// DESCRIPTION : +// sub_group_broadcast - each work_item registers it's own value. +// All work_items in subgroup takes one value from only one (any) work_item +// sub_group_broadcast_first - same as type 0. All work_items in +// subgroup takes only one value from only one chosen (the smallest subgroup ID) +// work_item +// sub_group_non_uniform_broadcast - same as type 0 but +// only 4 work_items from subgroup enter the code (are active) +template struct BC +{ + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + log_info(" sub_group_%s(%s)...%s\n", operation_names(operation), + TypeManager::name(), extra_text); + } + + static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params) + { + int i, ii, j, k, n; + int ng = test_params.global_workgroup_size; + int nw = test_params.local_workgroup_size; + int ns = test_params.subgroup_size; + int nj = (nw + ns - 1) / ns; + int d = ns > 100 ? 100 : ns; + int non_uniform_size = ng % nw; + ng = ng / nw; + int last_subgroup_size = 0; + ii = 0; + + if (non_uniform_size) + { + ng++; + } + for (k = 0; k < ng; ++k) + { // for each work_group + if (non_uniform_size && k == ng - 1) + { + set_last_workgroup_params(non_uniform_size, nj, ns, nw, + last_subgroup_size); + } + for (j = 0; j < nj; ++j) + { // for each subgroup + ii = j * ns; + if (last_subgroup_size && j == nj - 1) + { + n = last_subgroup_size; + } + else + { + n = ii + ns > nw ? nw - ii : ns; + } + int bcast_if = 0; + int bcast_elseif = 0; + int bcast_index = (int)(genrand_int32(gMTdata) & 0x7fffffff) + % (d > n ? n : d); + // l - calculate subgroup local id from which value will be + // broadcasted (one the same value for whole subgroup) + if (operation != SubgroupsBroadcastOp::broadcast) + { + // reduce brodcasting index in case of non_uniform and + // last workgroup last subgroup + if (last_subgroup_size && j == nj - 1 + && last_subgroup_size < NR_OF_ACTIVE_WORK_ITEMS) + { + bcast_if = bcast_index % last_subgroup_size; + bcast_elseif = bcast_if; + } + else + { + bcast_if = bcast_index % NR_OF_ACTIVE_WORK_ITEMS; + bcast_elseif = NR_OF_ACTIVE_WORK_ITEMS + + bcast_index % (n - NR_OF_ACTIVE_WORK_ITEMS); + } + } + + for (i = 0; i < n; ++i) + { + if (operation == SubgroupsBroadcastOp::broadcast) + { + int midx = 4 * ii + 4 * i + 2; + m[midx] = (cl_int)bcast_index; + } + else + { + if (i < NR_OF_ACTIVE_WORK_ITEMS) + { + // index of the third + // element int the vector. + int midx = 4 * ii + 4 * i + 2; + // storing information about + // broadcasting index - + // earlier calculated + m[midx] = (cl_int)bcast_if; + } + else + { // index of the third + // element int the vector. + int midx = 4 * ii + 4 * i + 3; + m[midx] = (cl_int)bcast_elseif; + } + } + + // calculate value for broadcasting + cl_ulong number = genrand_int64(gMTdata); + set_value(t[ii + i], number); + } + } + // Now map into work group using map from device + for (j = 0; j < nw; ++j) + { // for each element in work_group + // calculate index as number of subgroup + // plus subgroup local id + x[j] = t[j]; + } + x += nw; + m += 4 * nw; + } + } + + static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, + const WorkGroupParams &test_params) + { + int ii, i, j, k, l, n; + int ng = test_params.global_workgroup_size; + int nw = test_params.local_workgroup_size; + int ns = test_params.subgroup_size; + int nj = (nw + ns - 1) / ns; + Ty tr, rr; + int non_uniform_size = ng % nw; + ng = ng / nw; + int last_subgroup_size = 0; + if (non_uniform_size) ng++; + + for (k = 0; k < ng; ++k) + { // for each work_group + if (non_uniform_size && k == ng - 1) + { + set_last_workgroup_params(non_uniform_size, nj, ns, nw, + last_subgroup_size); + } + for (j = 0; j < nw; ++j) + { // inside the work_group + mx[j] = x[j]; // read host inputs for work_group + my[j] = y[j]; // read device outputs for work_group + } + + for (j = 0; j < nj; ++j) + { // for each subgroup + ii = j * ns; + if (last_subgroup_size && j == nj - 1) + { + n = last_subgroup_size; + } + else + { + n = ii + ns > nw ? nw - ii : ns; + } + + // Check result + if (operation == SubgroupsBroadcastOp::broadcast_first) + { + int lowest_active_id = -1; + for (i = 0; i < n; ++i) + { + + lowest_active_id = i < NR_OF_ACTIVE_WORK_ITEMS + ? 0 + : NR_OF_ACTIVE_WORK_ITEMS; + // findout if broadcasted + // value is the same + tr = mx[ii + lowest_active_id]; + // findout if broadcasted to all + rr = my[ii + i]; + + if (!compare(rr, tr)) + { + log_error( + "ERROR: sub_group_broadcast_first(%s) " + "mismatch " + "for local id %d in sub group %d in group " + "%d\n", + TypeManager::name(), i, j, k); + return TEST_FAIL; + } + } + } + else + { + for (i = 0; i < n; ++i) + { + if (operation == SubgroupsBroadcastOp::broadcast) + { + int midx = 4 * ii + 4 * i + 2; + l = (int)m[midx]; + tr = mx[ii + l]; + } + else + { + if (i < NR_OF_ACTIVE_WORK_ITEMS) + { // take index of array where info + // which work_item will be + // broadcast its value is stored + int midx = 4 * ii + 4 * i + 2; + // take subgroup local id of + // this work_item + l = (int)m[midx]; + // take value generated on host + // for this work_item + tr = mx[ii + l]; + } + else + { + int midx = 4 * ii + 4 * i + 3; + l = (int)m[midx]; + tr = mx[ii + l]; + } + } + rr = my[ii + i]; // read device outputs for + // work_item in the subgroup + + if (!compare(rr, tr)) + { + log_error("ERROR: sub_group_%s(%s) " + "mismatch for local id %d in sub " + "group %d in group %d - got %lu " + "expected %lu\n", + operation_names(operation), + TypeManager::name(), i, j, k, rr, tr); + return TEST_FAIL; + } + } + } + } + x += nw; + y += nw; + m += 4 * nw; + } + return TEST_PASS; + } +}; + +static float to_float(subgroups::cl_half x) { return cl_half_to_float(x.data); } + +static subgroups::cl_half to_half(float x) +{ + subgroups::cl_half value; + value.data = cl_half_from_float(x, g_rounding_mode); + return value; +} + +// for integer types +template inline Ty calculate(Ty a, Ty b, ArithmeticOp operation) +{ + switch (operation) + { + case ArithmeticOp::add_: return a + b; + case ArithmeticOp::max_: return a > b ? a : b; + case ArithmeticOp::min_: return a < b ? a : b; + case ArithmeticOp::mul_: return a * b; + case ArithmeticOp::and_: return a & b; + case ArithmeticOp::or_: return a | b; + case ArithmeticOp::xor_: return a ^ b; + case ArithmeticOp::logical_and: return a && b; + case ArithmeticOp::logical_or: return a || b; + case ArithmeticOp::logical_xor: return !a ^ !b; + default: log_error("Unknown operation request"); break; + } + return 0; +} +// Specialize for floating points. +template <> +inline cl_double calculate(cl_double a, cl_double b, ArithmeticOp operation) +{ + switch (operation) + { + case ArithmeticOp::add_: { + return a + b; + } + case ArithmeticOp::max_: { + return a > b ? a : b; + } + case ArithmeticOp::min_: { + return a < b ? a : b; + } + case ArithmeticOp::mul_: { + return a * b; + } + default: log_error("Unknown operation request"); break; + } + return 0; +} + +template <> +inline cl_float calculate(cl_float a, cl_float b, ArithmeticOp operation) +{ + switch (operation) + { + case ArithmeticOp::add_: { + return a + b; + } + case ArithmeticOp::max_: { + return a > b ? a : b; + } + case ArithmeticOp::min_: { + return a < b ? a : b; + } + case ArithmeticOp::mul_: { + return a * b; + } + default: log_error("Unknown operation request"); break; + } + return 0; +} + +template <> +inline subgroups::cl_half calculate(subgroups::cl_half a, subgroups::cl_half b, + ArithmeticOp operation) +{ + switch (operation) + { + case ArithmeticOp::add_: return to_half(to_float(a) + to_float(b)); + case ArithmeticOp::max_: + return to_float(a) > to_float(b) || is_half_nan(b.data) ? a : b; + case ArithmeticOp::min_: + return to_float(a) < to_float(b) || is_half_nan(b.data) ? a : b; + case ArithmeticOp::mul_: return to_half(to_float(a) * to_float(b)); + default: log_error("Unknown operation request"); break; + } + return to_half(0); +} + +template bool is_floating_point() +{ + return std::is_floating_point::value + || std::is_same::value; +} + +// limit possible input values to avoid arithmetic rounding/overflow issues. +// for each subgroup values defined different values +// for rest of workitems set 1 +// shuffle values +static void fill_and_shuffle_safe_values(std::vector &safe_values, + int sb_size) +{ + // max product is 720, cl_half has enough precision for it + const std::vector non_one_values{ 2, 3, 4, 5, 6 }; + + if (sb_size <= non_one_values.size()) + { + safe_values.assign(non_one_values.begin(), + non_one_values.begin() + sb_size); + } + else + { + safe_values.assign(sb_size, 1); + std::copy(non_one_values.begin(), non_one_values.end(), + safe_values.begin()); + } + + std::mt19937 mersenne_twister_engine(10000); + std::shuffle(safe_values.begin(), safe_values.end(), + mersenne_twister_engine); +}; + +template +void generate_inputs(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng) +{ + int nj = (nw + ns - 1) / ns; + + std::vector safe_values; + if (operation == ArithmeticOp::mul_ || operation == ArithmeticOp::add_) + { + fill_and_shuffle_safe_values(safe_values, ns); + } + + for (int k = 0; k < ng; ++k) + { + for (int j = 0; j < nj; ++j) + { + int ii = j * ns; + int n = ii + ns > nw ? nw - ii : ns; + + for (int i = 0; i < n; ++i) + { + cl_ulong out_value; + if (operation == ArithmeticOp::mul_ + || operation == ArithmeticOp::add_) + { + out_value = safe_values[i]; + } + else + { + out_value = genrand_int64(gMTdata) % (32 * n); + if ((operation == ArithmeticOp::logical_and + || operation == ArithmeticOp::logical_or + || operation == ArithmeticOp::logical_xor) + && ((out_value >> 32) & 1) == 0) + out_value = 0; // increase probability of false + } + set_value(t[ii + i], out_value); + } + } + + // Now map into work group using map from device + for (int j = 0; j < nw; ++j) + { + x[j] = t[j]; + } + + x += nw; + m += 4 * nw; + } +} + +template struct SHF +{ + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + log_info(" sub_group_%s(%s)...%s\n", operation_names(operation), + TypeManager::name(), extra_text); + } + + static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params) + { + int i, ii, j, k, l, n, delta; + int nw = test_params.local_workgroup_size; + int ns = test_params.subgroup_size; + int ng = test_params.global_workgroup_size; + int nj = (nw + ns - 1) / ns; + int d = ns > 100 ? 100 : ns; + ii = 0; + ng = ng / nw; + for (k = 0; k < ng; ++k) + { // for each work_group + for (j = 0; j < nj; ++j) + { // for each subgroup + ii = j * ns; + n = ii + ns > nw ? nw - ii : ns; + for (i = 0; i < n; ++i) + { + int midx = 4 * ii + 4 * i + 2; + l = (int)(genrand_int32(gMTdata) & 0x7fffffff) + % (d > n ? n : d); + switch (operation) + { + case ShuffleOp::shuffle: + case ShuffleOp::shuffle_xor: + // storing information about shuffle index + m[midx] = (cl_int)l; + break; + case ShuffleOp::shuffle_up: + delta = l; // calculate delta for shuffle up + if (i - delta < 0) + { + delta = i; + } + m[midx] = (cl_int)delta; + break; + case ShuffleOp::shuffle_down: + delta = l; // calculate delta for shuffle down + if (i + delta >= n) + { + delta = n - 1 - i; + } + m[midx] = (cl_int)delta; + break; + default: break; + } + cl_ulong number = genrand_int64(gMTdata); + set_value(t[ii + i], number); + } + } + // Now map into work group using map from device + for (j = 0; j < nw; ++j) + { // for each element in work_group + x[j] = t[j]; + } + x += nw; + m += 4 * nw; + } + } + + static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, + const WorkGroupParams &test_params) + { + int ii, i, j, k, l, n; + int nw = test_params.local_workgroup_size; + int ns = test_params.subgroup_size; + int ng = test_params.global_workgroup_size; + int nj = (nw + ns - 1) / ns; + Ty tr, rr; + ng = ng / nw; + + for (k = 0; k < ng; ++k) + { // for each work_group + for (j = 0; j < nw; ++j) + { // inside the work_group + mx[j] = x[j]; // read host inputs for work_group + my[j] = y[j]; // read device outputs for work_group + } + + for (j = 0; j < nj; ++j) + { // for each subgroup + ii = j * ns; + n = ii + ns > nw ? nw - ii : ns; + + for (i = 0; i < n; ++i) + { // inside the subgroup + // shuffle index storage + int midx = 4 * ii + 4 * i + 2; + l = (int)m[midx]; + rr = my[ii + i]; + switch (operation) + { + // shuffle basic - treat l as index + case ShuffleOp::shuffle: tr = mx[ii + l]; break; + // shuffle up - treat l as delta + case ShuffleOp::shuffle_up: tr = mx[ii + i - l]; break; + // shuffle up - treat l as delta + case ShuffleOp::shuffle_down: + tr = mx[ii + i + l]; + break; + // shuffle xor - treat l as mask + case ShuffleOp::shuffle_xor: + tr = mx[ii + (i ^ l)]; + break; + default: break; + } + + if (!compare(rr, tr)) + { + log_error("ERROR: sub_group_%s(%s) mismatch for " + "local id %d in sub group %d in group %d\n", + operation_names(operation), + TypeManager::name(), i, j, k); + return TEST_FAIL; + } + } + } + x += nw; + y += nw; + m += 4 * nw; + } + return TEST_PASS; + } +}; + +template struct SCEX_NU +{ + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + std::string func_name = (test_params.all_work_item_masks.size() > 0 + ? "sub_group_non_uniform_scan_exclusive" + : "sub_group_scan_exclusive"); + log_info(" %s_%s(%s)...%s\n", func_name.c_str(), + operation_names(operation), TypeManager::name(), + extra_text); + } + + static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params) + { + int nw = test_params.local_workgroup_size; + int ns = test_params.subgroup_size; + int ng = test_params.global_workgroup_size; + ng = ng / nw; + generate_inputs(x, t, m, ns, nw, ng); + } + + static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, + const WorkGroupParams &test_params) + { + int ii, i, j, k, n; + int nw = test_params.local_workgroup_size; + int ns = test_params.subgroup_size; + int ng = test_params.global_workgroup_size; + bs128 work_items_mask = test_params.work_items_mask; + int nj = (nw + ns - 1) / ns; + Ty tr, rr; + ng = ng / nw; + + std::string func_name = (test_params.all_work_item_masks.size() > 0 + ? "sub_group_non_uniform_scan_exclusive" + : "sub_group_scan_exclusive"); + + // for uniform case take into consideration all workitems + if (!work_items_mask.any()) + { + work_items_mask.set(); + } + for (k = 0; k < ng; ++k) + { // for each work_group + // Map to array indexed to array indexed by local ID and sub group + for (j = 0; j < nw; ++j) + { // inside the work_group + mx[j] = x[j]; // read host inputs for work_group + my[j] = y[j]; // read device outputs for work_group + } + for (j = 0; j < nj; ++j) + { + ii = j * ns; + n = ii + ns > nw ? nw - ii : ns; + std::set active_work_items; + for (i = 0; i < n; ++i) + { + if (work_items_mask.test(i)) + { + active_work_items.insert(i); + } + } + if (active_work_items.empty()) + { + continue; + } + else + { + tr = TypeManager::identify_limits(operation); + for (const int &active_work_item : active_work_items) + { + rr = my[ii + active_work_item]; + if (!compare_ordered(rr, tr)) + { + log_error( + "ERROR: %s_%s(%s) " + "mismatch for local id %d in sub group %d in " + "group %d Expected: %d Obtained: %d\n", + func_name.c_str(), operation_names(operation), + TypeManager::name(), i, j, k, tr, rr); + return TEST_FAIL; + } + tr = calculate(tr, mx[ii + active_work_item], + operation); + } + } + } + x += nw; + y += nw; + m += 4 * nw; + } + + return TEST_PASS; + } +}; + +// Test for scan inclusive non uniform functions +template struct SCIN_NU +{ + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + std::string func_name = (test_params.all_work_item_masks.size() > 0 + ? "sub_group_non_uniform_scan_inclusive" + : "sub_group_scan_inclusive"); + log_info(" %s_%s(%s)...%s\n", func_name.c_str(), + operation_names(operation), TypeManager::name(), + extra_text); + } + + static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params) + { + int nw = test_params.local_workgroup_size; + int ns = test_params.subgroup_size; + int ng = test_params.global_workgroup_size; + ng = ng / nw; + generate_inputs(x, t, m, ns, nw, ng); + } + + static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, + const WorkGroupParams &test_params) + { + int ii, i, j, k, n; + int nw = test_params.local_workgroup_size; + int ns = test_params.subgroup_size; + int ng = test_params.global_workgroup_size; + bs128 work_items_mask = test_params.work_items_mask; + + int nj = (nw + ns - 1) / ns; + Ty tr, rr; + ng = ng / nw; + + std::string func_name = (test_params.all_work_item_masks.size() > 0 + ? "sub_group_non_uniform_scan_inclusive" + : "sub_group_scan_inclusive"); + + // for uniform case take into consideration all workitems + if (!work_items_mask.any()) + { + work_items_mask.set(); + } + // std::bitset<32> mask32(use_work_items_mask); + // for (int k) mask32.count(); + for (k = 0; k < ng; ++k) + { // for each work_group + // Map to array indexed to array indexed by local ID and sub group + for (j = 0; j < nw; ++j) + { // inside the work_group + mx[j] = x[j]; // read host inputs for work_group + my[j] = y[j]; // read device outputs for work_group + } + for (j = 0; j < nj; ++j) + { + ii = j * ns; + n = ii + ns > nw ? nw - ii : ns; + std::set active_work_items; + int catch_frist_active = -1; + + for (i = 0; i < n; ++i) + { + if (work_items_mask.test(i)) + { + if (catch_frist_active == -1) + { + catch_frist_active = i; + } + active_work_items.insert(i); + } + } + if (active_work_items.empty()) + { + continue; + } + else + { + tr = TypeManager::identify_limits(operation); + for (const int &active_work_item : active_work_items) + { + rr = my[ii + active_work_item]; + if (active_work_items.size() == 1) + { + tr = mx[ii + catch_frist_active]; + } + else + { + tr = calculate(tr, mx[ii + active_work_item], + operation); + } + if (!compare_ordered(rr, tr)) + { + log_error( + "ERROR: %s_%s(%s) " + "mismatch for local id %d in sub group %d " + "in " + "group %d Expected: %d Obtained: %d\n", + func_name.c_str(), operation_names(operation), + TypeManager::name(), active_work_item, j, k, + tr, rr); + return TEST_FAIL; + } + } + } + } + x += nw; + y += nw; + m += 4 * nw; + } + + return TEST_PASS; + } +}; + +// Test for reduce non uniform functions +template struct RED_NU +{ + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + std::string func_name = (test_params.all_work_item_masks.size() > 0 + ? "sub_group_non_uniform_reduce" + : "sub_group_reduce"); + log_info(" %s_%s(%s)...%s\n", func_name.c_str(), + operation_names(operation), TypeManager::name(), + extra_text); + } + + static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params) + { + int nw = test_params.local_workgroup_size; + int ns = test_params.subgroup_size; + int ng = test_params.global_workgroup_size; + ng = ng / nw; + generate_inputs(x, t, m, ns, nw, ng); + } + + static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, + const WorkGroupParams &test_params) + { + int ii, i, j, k, n; + int nw = test_params.local_workgroup_size; + int ns = test_params.subgroup_size; + int ng = test_params.global_workgroup_size; + bs128 work_items_mask = test_params.work_items_mask; + int nj = (nw + ns - 1) / ns; + ng = ng / nw; + Ty tr, rr; + + std::string func_name = (test_params.all_work_item_masks.size() > 0 + ? "sub_group_non_uniform_reduce" + : "sub_group_reduce"); + + for (k = 0; k < ng; ++k) + { + // Map to array indexed to array indexed by local ID and sub + // group + for (j = 0; j < nw; ++j) + { + mx[j] = x[j]; + my[j] = y[j]; + } + + if (!work_items_mask.any()) + { + work_items_mask.set(); + } + + for (j = 0; j < nj; ++j) + { + ii = j * ns; + n = ii + ns > nw ? nw - ii : ns; + std::set active_work_items; + int catch_frist_active = -1; + for (i = 0; i < n; ++i) + { + if (work_items_mask.test(i)) + { + if (catch_frist_active == -1) + { + catch_frist_active = i; + tr = mx[ii + i]; + active_work_items.insert(i); + continue; + } + active_work_items.insert(i); + tr = calculate(tr, mx[ii + i], operation); + } + } + + if (active_work_items.empty()) + { + continue; + } + + for (const int &active_work_item : active_work_items) + { + rr = my[ii + active_work_item]; + if (!compare_ordered(rr, tr)) + { + log_error("ERROR: %s_%s(%s) " + "mismatch for local id %d in sub group %d in " + "group %d Expected: %d Obtained: %d\n", + func_name.c_str(), operation_names(operation), + TypeManager::name(), active_work_item, j, + k, tr, rr); + return TEST_FAIL; + } + } + } + x += nw; + y += nw; + m += 4 * nw; + } + + return TEST_PASS; + } +}; + +#endif diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h index 6e84ccb37f..153045d08c 100644 --- a/test_conformance/subgroups/subhelpers.h +++ b/test_conformance/subgroups/subhelpers.h @@ -19,13 +19,334 @@ #include "testHarness.h" #include "kernelHelpers.h" #include "typeWrappers.h" +#include "imageHelpers.h" #include #include +#include +#include +#include +#include + +#define NR_OF_ACTIVE_WORK_ITEMS 4 + +extern MTdata gMTdata; +typedef std::bitset<128> bs128; +extern cl_half_rounding_mode g_rounding_mode; + +static bs128 cl_uint4_to_bs128(cl_uint4 v) +{ + return bs128(v.s0) | (bs128(v.s1) << 32) | (bs128(v.s2) << 64) + | (bs128(v.s3) << 96); +} + +static cl_uint4 bs128_to_cl_uint4(bs128 v) +{ + bs128 bs128_ffffffff = 0xffffffffU; + + cl_uint4 r; + r.s0 = ((v >> 0) & bs128_ffffffff).to_ulong(); + r.s1 = ((v >> 32) & bs128_ffffffff).to_ulong(); + r.s2 = ((v >> 64) & bs128_ffffffff).to_ulong(); + r.s3 = ((v >> 96) & bs128_ffffffff).to_ulong(); + + return r; +} + +struct WorkGroupParams +{ + WorkGroupParams(size_t gws, size_t lws, int dm_arg = -1) + : global_workgroup_size(gws), local_workgroup_size(lws), + divergence_mask_arg(dm_arg) + { + subgroup_size = 0; + work_items_mask = 0; + use_core_subgroups = true; + dynsc = 0; + load_masks(); + } + size_t global_workgroup_size; + size_t local_workgroup_size; + size_t subgroup_size; + bs128 work_items_mask; + int dynsc; + bool use_core_subgroups; + std::vector all_work_item_masks; + int divergence_mask_arg; + void save_kernel_source(const std::string &source, std::string name = "") + { + if (name == "") + { + name = "default"; + } + if (kernel_function_name.find(name) != kernel_function_name.end()) + { + log_info("Kernel definition duplication. Source will be " + "overwritten for function name %s", + name.c_str()); + } + kernel_function_name[name] = source; + }; + // return specific defined kernel or default. + std::string get_kernel_source(std::string name) + { + if (kernel_function_name.find(name) == kernel_function_name.end()) + { + return kernel_function_name["default"]; + } + return kernel_function_name[name]; + } + + +private: + std::map kernel_function_name; + void load_masks() + { + if (divergence_mask_arg != -1) + { + // 1 in string will be set 1, 0 will be set 0 + bs128 mask_0xf0f0f0f0("11110000111100001111000011110000" + "11110000111100001111000011110000" + "11110000111100001111000011110000" + "11110000111100001111000011110000", + 128, '0', '1'); + all_work_item_masks.push_back(mask_0xf0f0f0f0); + // 1 in string will be set 0, 0 will be set 1 + bs128 mask_0x0f0f0f0f("11110000111100001111000011110000" + "11110000111100001111000011110000" + "11110000111100001111000011110000" + "11110000111100001111000011110000", + 128, '1', '0'); + all_work_item_masks.push_back(mask_0x0f0f0f0f); + bs128 mask_0x5555aaaa("10101010101010101010101010101010" + "10101010101010101010101010101010" + "10101010101010101010101010101010" + "10101010101010101010101010101010", + 128, '0', '1'); + all_work_item_masks.push_back(mask_0x5555aaaa); + bs128 mask_0xaaaa5555("10101010101010101010101010101010" + "10101010101010101010101010101010" + "10101010101010101010101010101010" + "10101010101010101010101010101010", + 128, '1', '0'); + all_work_item_masks.push_back(mask_0xaaaa5555); + // 0x0f0ff0f0 + bs128 mask_0x0f0ff0f0("00001111000011111111000011110000" + "00001111000011111111000011110000" + "00001111000011111111000011110000" + "00001111000011111111000011110000", + 128, '0', '1'); + all_work_item_masks.push_back(mask_0x0f0ff0f0); + // 0xff0000ff + bs128 mask_0xff0000ff("11111111000000000000000011111111" + "11111111000000000000000011111111" + "11111111000000000000000011111111" + "11111111000000000000000011111111", + 128, '0', '1'); + all_work_item_masks.push_back(mask_0xff0000ff); + // 0xff00ff00 + bs128 mask_0xff00ff00("11111111000000001111111100000000" + "11111111000000001111111100000000" + "11111111000000001111111100000000" + "11111111000000001111111100000000", + 128, '0', '1'); + all_work_item_masks.push_back(mask_0xff00ff00); + // 0x00ffff00 + bs128 mask_0x00ffff00("00000000111111111111111100000000" + "00000000111111111111111100000000" + "00000000111111111111111100000000" + "00000000111111111111111100000000", + 128, '0', '1'); + all_work_item_masks.push_back(mask_0x00ffff00); + // 0x80 1 workitem highest id for 8 subgroup size + bs128 mask_0x80808080("10000000100000001000000010000000" + "10000000100000001000000010000000" + "10000000100000001000000010000000" + "10000000100000001000000010000000", + 128, '0', '1'); + + all_work_item_masks.push_back(mask_0x80808080); + // 0x8000 1 workitem highest id for 16 subgroup size + bs128 mask_0x80008000("10000000000000001000000000000000" + "10000000000000001000000000000000" + "10000000000000001000000000000000" + "10000000000000001000000000000000", + 128, '0', '1'); + all_work_item_masks.push_back(mask_0x80008000); + // 0x80000000 1 workitem highest id for 32 subgroup size + bs128 mask_0x80000000("10000000000000000000000000000000" + "10000000000000000000000000000000" + "10000000000000000000000000000000" + "10000000000000000000000000000000", + 128, '0', '1'); + all_work_item_masks.push_back(mask_0x80000000); + // 0x80000000 00000000 1 workitem highest id for 64 subgroup size + // 0x80000000 1 workitem highest id for 32 subgroup size + bs128 mask_0x8000000000000000("10000000000000000000000000000000" + "00000000000000000000000000000000" + "10000000000000000000000000000000" + "00000000000000000000000000000000", + 128, '0', '1'); + + all_work_item_masks.push_back(mask_0x8000000000000000); + // 0x80000000 00000000 00000000 00000000 1 workitem highest id for + // 128 subgroup size + bs128 mask_0x80000000000000000000000000000000( + "10000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000", + 128, '0', '1'); + all_work_item_masks.push_back( + mask_0x80000000000000000000000000000000); + + bs128 mask_0xffffffff("11111111111111111111111111111111" + "11111111111111111111111111111111" + "11111111111111111111111111111111" + "11111111111111111111111111111111", + 128, '0', '1'); + all_work_item_masks.push_back(mask_0xffffffff); + } + } +}; + +enum class SubgroupsBroadcastOp +{ + broadcast, + broadcast_first, + non_uniform_broadcast +}; + +enum class NonUniformVoteOp +{ + elect, + all, + any, + all_equal +}; + +enum class BallotOp +{ + ballot, + inverse_ballot, + ballot_bit_extract, + ballot_bit_count, + ballot_inclusive_scan, + ballot_exclusive_scan, + ballot_find_lsb, + ballot_find_msb, + eq_mask, + ge_mask, + gt_mask, + le_mask, + lt_mask, +}; + +enum class ShuffleOp +{ + shuffle, + shuffle_up, + shuffle_down, + shuffle_xor +}; + +enum class ArithmeticOp +{ + add_, + max_, + min_, + mul_, + and_, + or_, + xor_, + logical_and, + logical_or, + logical_xor +}; + +static const char *const operation_names(ArithmeticOp operation) +{ + switch (operation) + { + case ArithmeticOp::add_: return "add"; + case ArithmeticOp::max_: return "max"; + case ArithmeticOp::min_: return "min"; + case ArithmeticOp::mul_: return "mul"; + case ArithmeticOp::and_: return "and"; + case ArithmeticOp::or_: return "or"; + case ArithmeticOp::xor_: return "xor"; + case ArithmeticOp::logical_and: return "logical_and"; + case ArithmeticOp::logical_or: return "logical_or"; + case ArithmeticOp::logical_xor: return "logical_xor"; + default: log_error("Unknown operation request"); break; + } + return ""; +} + +static const char *const operation_names(BallotOp operation) +{ + switch (operation) + { + case BallotOp::ballot: return "ballot"; + case BallotOp::inverse_ballot: return "inverse_ballot"; + case BallotOp::ballot_bit_extract: return "bit_extract"; + case BallotOp::ballot_bit_count: return "bit_count"; + case BallotOp::ballot_inclusive_scan: return "inclusive_scan"; + case BallotOp::ballot_exclusive_scan: return "exclusive_scan"; + case BallotOp::ballot_find_lsb: return "find_lsb"; + case BallotOp::ballot_find_msb: return "find_msb"; + case BallotOp::eq_mask: return "eq"; + case BallotOp::ge_mask: return "ge"; + case BallotOp::gt_mask: return "gt"; + case BallotOp::le_mask: return "le"; + case BallotOp::lt_mask: return "lt"; + default: log_error("Unknown operation request"); break; + } + return ""; +} + +static const char *const operation_names(ShuffleOp operation) +{ + switch (operation) + { + case ShuffleOp::shuffle: return "shuffle"; + case ShuffleOp::shuffle_up: return "shuffle_up"; + case ShuffleOp::shuffle_down: return "shuffle_down"; + case ShuffleOp::shuffle_xor: return "shuffle_xor"; + default: log_error("Unknown operation request"); break; + } + return ""; +} + +static const char *const operation_names(NonUniformVoteOp operation) +{ + switch (operation) + { + case NonUniformVoteOp::all: return "all"; + case NonUniformVoteOp::all_equal: return "all_equal"; + case NonUniformVoteOp::any: return "any"; + case NonUniformVoteOp::elect: return "elect"; + default: log_error("Unknown operation request"); break; + } + return ""; +} + +static const char *const operation_names(SubgroupsBroadcastOp operation) +{ + switch (operation) + { + case SubgroupsBroadcastOp::broadcast: return "broadcast"; + case SubgroupsBroadcastOp::broadcast_first: return "broadcast_first"; + case SubgroupsBroadcastOp::non_uniform_broadcast: + return "non_uniform_broadcast"; + default: log_error("Unknown operation request"); break; + } + return ""; +} class subgroupsAPI { public: - subgroupsAPI(cl_platform_id platform, bool useCoreSubgroups) + subgroupsAPI(cl_platform_id platform, bool use_core_subgroups) { static_assert(CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR, @@ -33,7 +354,7 @@ class subgroupsAPI { static_assert(CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR, "Enums have to be the same"); - if (useCoreSubgroups) + if (use_core_subgroups) { _clGetKernelSubGroupInfo_ptr = &clGetKernelSubGroupInfo; clGetKernelSubGroupInfo_name = "clGetKernelSubGroupInfo"; @@ -56,220 +377,950 @@ class subgroupsAPI { clGetKernelSubGroupInfoKHR_fn _clGetKernelSubGroupInfo_ptr; }; -// Some template helpers -template struct TypeName; -template <> struct TypeName +// Need to defined custom type for vector size = 3 and half type. This is +// because of 3-component types are otherwise indistinguishable from the +// 4-component types, and because the half type is indistinguishable from some +// other 16-bit type (ushort) +namespace subgroups { +struct cl_char3 { - static const char *val() { return "half"; } + ::cl_char3 data; }; -template <> struct TypeName +struct cl_uchar3 { - static const char *val() { return "uint"; } + ::cl_uchar3 data; }; -template <> struct TypeName +struct cl_short3 { - static const char *val() { return "int"; } + ::cl_short3 data; }; -template <> struct TypeName +struct cl_ushort3 { - static const char *val() { return "ulong"; } + ::cl_ushort3 data; }; -template <> struct TypeName +struct cl_int3 { - static const char *val() { return "long"; } + ::cl_int3 data; }; -template <> struct TypeName +struct cl_uint3 { - static const char *val() { return "float"; } + ::cl_uint3 data; }; -template <> struct TypeName +struct cl_long3 { - static const char *val() { return "double"; } + ::cl_long3 data; }; - -template struct TypeDef; -template <> struct TypeDef +struct cl_ulong3 +{ + ::cl_ulong3 data; +}; +struct cl_float3 +{ + ::cl_float3 data; +}; +struct cl_double3 +{ + ::cl_double3 data; +}; +struct cl_half { - static const char *val() { return "typedef half Type;\n"; } + ::cl_half data; }; -template <> struct TypeDef +struct cl_half2 { - static const char *val() { return "typedef uint Type;\n"; } + ::cl_half2 data; }; -template <> struct TypeDef +struct cl_half3 { - static const char *val() { return "typedef int Type;\n"; } + ::cl_half3 data; }; -template <> struct TypeDef +struct cl_half4 { - static const char *val() { return "typedef ulong Type;\n"; } + ::cl_half4 data; }; -template <> struct TypeDef +struct cl_half8 { - static const char *val() { return "typedef long Type;\n"; } + ::cl_half8 data; }; -template <> struct TypeDef +struct cl_half16 { - static const char *val() { return "typedef float Type;\n"; } + ::cl_half16 data; }; -template <> struct TypeDef +} + +static bool int64_ok(cl_device_id device) +{ + char profile[128]; + int error; + + error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), + (void *)&profile, NULL); + if (error) + { + log_info("clGetDeviceInfo failed with CL_DEVICE_PROFILE\n"); + return false; + } + + if (strcmp(profile, "EMBEDDED_PROFILE") == 0) + return is_extension_available(device, "cles_khr_int64"); + + return true; +} + +static bool double_ok(cl_device_id device) { - static const char *val() { return "typedef double Type;\n"; } + int error; + cl_device_fp_config c; + error = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(c), + (void *)&c, NULL); + if (error) + { + log_info("clGetDeviceInfo failed with CL_DEVICE_DOUBLE_FP_CONFIG\n"); + return false; + } + return c != 0; +} + +static bool half_ok(cl_device_id device) +{ + int error; + cl_device_fp_config c; + error = clGetDeviceInfo(device, CL_DEVICE_HALF_FP_CONFIG, sizeof(c), + (void *)&c, NULL); + if (error) + { + log_info("clGetDeviceInfo failed with CL_DEVICE_HALF_FP_CONFIG\n"); + return false; + } + return c != 0; +} + +template struct CommonTypeManager +{ + + static const char *name() { return ""; } + static const char *add_typedef() { return "\n"; } + typedef std::false_type is_vector_type; + typedef std::false_type is_sb_vector_size3; + typedef std::false_type is_sb_vector_type; + typedef std::false_type is_sb_scalar_type; + static const bool type_supported(cl_device_id) { return true; } + static const Ty identify_limits(ArithmeticOp operation) + { + switch (operation) + { + case ArithmeticOp::add_: return (Ty)0; + case ArithmeticOp::max_: return (std::numeric_limits::min)(); + case ArithmeticOp::min_: return (std::numeric_limits::max)(); + case ArithmeticOp::mul_: return (Ty)1; + case ArithmeticOp::and_: return (Ty)~0; + case ArithmeticOp::or_: return (Ty)0; + case ArithmeticOp::xor_: return (Ty)0; + default: log_error("Unknown operation request"); break; + } + return 0; + } }; -template struct TypeIdentity; -// template <> struct TypeIdentity { static cl_half val() { return -// (cl_half)0.0; } }; template <> struct TypeIdentity { static -// cl_half val() { return -(cl_half)65536.0; } }; template <> struct -// TypeIdentity { static cl_half val() { return (cl_half)65536.0; } -// }; +template struct TypeManager; -template <> struct TypeIdentity +template <> struct TypeManager : public CommonTypeManager { - static cl_uint val() { return (cl_uint)0; } + static const char *name() { return "int"; } + static const char *add_typedef() { return "typedef int Type;\n"; } + static cl_int identify_limits(ArithmeticOp operation) + { + switch (operation) + { + case ArithmeticOp::add_: return (cl_int)0; + case ArithmeticOp::max_: + return (std::numeric_limits::min)(); + case ArithmeticOp::min_: + return (std::numeric_limits::max)(); + case ArithmeticOp::mul_: return (cl_int)1; + case ArithmeticOp::and_: return (cl_int)~0; + case ArithmeticOp::or_: return (cl_int)0; + case ArithmeticOp::xor_: return (cl_int)0; + case ArithmeticOp::logical_and: return (cl_int)1; + case ArithmeticOp::logical_or: return (cl_int)0; + case ArithmeticOp::logical_xor: return (cl_int)0; + default: log_error("Unknown operation request"); break; + } + return 0; + } }; -template <> struct TypeIdentity +template <> struct TypeManager : public CommonTypeManager { - static cl_uint val() { return (cl_uint)0; } + static const char *name() { return "int2"; } + static const char *add_typedef() { return "typedef int2 Type;\n"; } + typedef std::true_type is_vector_type; + using scalar_type = cl_int; }; -template <> struct TypeIdentity +template <> +struct TypeManager + : public CommonTypeManager { - static cl_uint val() { return (cl_uint)0xffffffff; } + static const char *name() { return "int3"; } + static const char *add_typedef() { return "typedef int3 Type;\n"; } + typedef std::true_type is_sb_vector_size3; + using scalar_type = cl_int; }; - -template <> struct TypeIdentity +template <> struct TypeManager : public CommonTypeManager { - static cl_int val() { return (cl_int)0; } + static const char *name() { return "int4"; } + static const char *add_typedef() { return "typedef int4 Type;\n"; } + using scalar_type = cl_int; + typedef std::true_type is_vector_type; }; -template <> struct TypeIdentity +template <> struct TypeManager : public CommonTypeManager { - static cl_int val() { return (cl_int)0x80000000; } + static const char *name() { return "int8"; } + static const char *add_typedef() { return "typedef int8 Type;\n"; } + using scalar_type = cl_int; + typedef std::true_type is_vector_type; }; -template <> struct TypeIdentity +template <> struct TypeManager : public CommonTypeManager { - static cl_int val() { return (cl_int)0x7fffffff; } + static const char *name() { return "int16"; } + static const char *add_typedef() { return "typedef int16 Type;\n"; } + using scalar_type = cl_int; + typedef std::true_type is_vector_type; }; - -template <> struct TypeIdentity +// cl_uint +template <> struct TypeManager : public CommonTypeManager { - static cl_ulong val() { return (cl_ulong)0; } + static const char *name() { return "uint"; } + static const char *add_typedef() { return "typedef uint Type;\n"; } }; -template <> struct TypeIdentity +template <> struct TypeManager : public CommonTypeManager { - static cl_ulong val() { return (cl_ulong)0; } + static const char *name() { return "uint2"; } + static const char *add_typedef() { return "typedef uint2 Type;\n"; } + using scalar_type = cl_uint; + typedef std::true_type is_vector_type; }; -template <> struct TypeIdentity +template <> +struct TypeManager + : public CommonTypeManager { - static cl_ulong val() { return (cl_ulong)0xffffffffffffffffULL; } + static const char *name() { return "uint3"; } + static const char *add_typedef() { return "typedef uint3 Type;\n"; } + typedef std::true_type is_sb_vector_size3; + using scalar_type = cl_uint; }; - -template <> struct TypeIdentity +template <> struct TypeManager : public CommonTypeManager { - static cl_long val() { return (cl_long)0; } + static const char *name() { return "uint4"; } + static const char *add_typedef() { return "typedef uint4 Type;\n"; } + using scalar_type = cl_uint; + typedef std::true_type is_vector_type; }; -template <> struct TypeIdentity +template <> struct TypeManager : public CommonTypeManager { - static cl_long val() { return (cl_long)0x8000000000000000ULL; } + static const char *name() { return "uint8"; } + static const char *add_typedef() { return "typedef uint8 Type;\n"; } + using scalar_type = cl_uint; + typedef std::true_type is_vector_type; }; -template <> struct TypeIdentity +template <> struct TypeManager : public CommonTypeManager { - static cl_long val() { return (cl_long)0x7fffffffffffffffULL; } + static const char *name() { return "uint16"; } + static const char *add_typedef() { return "typedef uint16 Type;\n"; } + using scalar_type = cl_uint; + typedef std::true_type is_vector_type; +}; +// cl_short +template <> struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "short"; } + static const char *add_typedef() { return "typedef short Type;\n"; } +}; +template <> struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "short2"; } + static const char *add_typedef() { return "typedef short2 Type;\n"; } + using scalar_type = cl_short; + typedef std::true_type is_vector_type; +}; +template <> +struct TypeManager + : public CommonTypeManager +{ + static const char *name() { return "short3"; } + static const char *add_typedef() { return "typedef short3 Type;\n"; } + typedef std::true_type is_sb_vector_size3; + using scalar_type = cl_short; +}; +template <> struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "short4"; } + static const char *add_typedef() { return "typedef short4 Type;\n"; } + using scalar_type = cl_short; + typedef std::true_type is_vector_type; +}; +template <> struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "short8"; } + static const char *add_typedef() { return "typedef short8 Type;\n"; } + using scalar_type = cl_short; + typedef std::true_type is_vector_type; +}; +template <> +struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "short16"; } + static const char *add_typedef() { return "typedef short16 Type;\n"; } + using scalar_type = cl_short; + typedef std::true_type is_vector_type; +}; +// cl_ushort +template <> struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "ushort"; } + static const char *add_typedef() { return "typedef ushort Type;\n"; } +}; +template <> +struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "ushort2"; } + static const char *add_typedef() { return "typedef ushort2 Type;\n"; } + using scalar_type = cl_ushort; + typedef std::true_type is_vector_type; +}; +template <> +struct TypeManager + : public CommonTypeManager +{ + static const char *name() { return "ushort3"; } + static const char *add_typedef() { return "typedef ushort3 Type;\n"; } + typedef std::true_type is_sb_vector_size3; + using scalar_type = cl_ushort; +}; +template <> +struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "ushort4"; } + static const char *add_typedef() { return "typedef ushort4 Type;\n"; } + using scalar_type = cl_ushort; + typedef std::true_type is_vector_type; +}; +template <> +struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "ushort8"; } + static const char *add_typedef() { return "typedef ushort8 Type;\n"; } + using scalar_type = cl_ushort; + typedef std::true_type is_vector_type; +}; +template <> +struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "ushort16"; } + static const char *add_typedef() { return "typedef ushort16 Type;\n"; } + using scalar_type = cl_ushort; + typedef std::true_type is_vector_type; +}; +// cl_char +template <> struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "char"; } + static const char *add_typedef() { return "typedef char Type;\n"; } +}; +template <> struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "char2"; } + static const char *add_typedef() { return "typedef char2 Type;\n"; } + using scalar_type = cl_char; + typedef std::true_type is_vector_type; +}; +template <> +struct TypeManager + : public CommonTypeManager +{ + static const char *name() { return "char3"; } + static const char *add_typedef() { return "typedef char3 Type;\n"; } + typedef std::true_type is_sb_vector_size3; + using scalar_type = cl_char; +}; +template <> struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "char4"; } + static const char *add_typedef() { return "typedef char4 Type;\n"; } + using scalar_type = cl_char; + typedef std::true_type is_vector_type; +}; +template <> struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "char8"; } + static const char *add_typedef() { return "typedef char8 Type;\n"; } + using scalar_type = cl_char; + typedef std::true_type is_vector_type; +}; +template <> struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "char16"; } + static const char *add_typedef() { return "typedef char16 Type;\n"; } + using scalar_type = cl_char; + typedef std::true_type is_vector_type; +}; +// cl_uchar +template <> struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "uchar"; } + static const char *add_typedef() { return "typedef uchar Type;\n"; } +}; +template <> struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "uchar2"; } + static const char *add_typedef() { return "typedef uchar2 Type;\n"; } + using scalar_type = cl_uchar; + typedef std::true_type is_vector_type; +}; +template <> +struct TypeManager + : public CommonTypeManager +{ + static const char *name() { return "uchar3"; } + static const char *add_typedef() { return "typedef uchar3 Type;\n"; } + typedef std::true_type is_sb_vector_size3; + using scalar_type = cl_uchar; +}; +template <> struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "uchar4"; } + static const char *add_typedef() { return "typedef uchar4 Type;\n"; } + using scalar_type = cl_uchar; + typedef std::true_type is_vector_type; +}; +template <> struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "uchar8"; } + static const char *add_typedef() { return "typedef uchar8 Type;\n"; } + using scalar_type = cl_uchar; + typedef std::true_type is_vector_type; +}; +template <> +struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "uchar16"; } + static const char *add_typedef() { return "typedef uchar16 Type;\n"; } + using scalar_type = cl_uchar; + typedef std::true_type is_vector_type; +}; +// cl_long +template <> struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "long"; } + static const char *add_typedef() { return "typedef long Type;\n"; } + static const bool type_supported(cl_device_id device) + { + return int64_ok(device); + } +}; +template <> struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "long2"; } + static const char *add_typedef() { return "typedef long2 Type;\n"; } + using scalar_type = cl_long; + typedef std::true_type is_vector_type; + static const bool type_supported(cl_device_id device) + { + return int64_ok(device); + } +}; +template <> +struct TypeManager + : public CommonTypeManager +{ + static const char *name() { return "long3"; } + static const char *add_typedef() { return "typedef long3 Type;\n"; } + typedef std::true_type is_sb_vector_size3; + using scalar_type = cl_long; + static const bool type_supported(cl_device_id device) + { + return int64_ok(device); + } +}; +template <> struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "long4"; } + static const char *add_typedef() { return "typedef long4 Type;\n"; } + using scalar_type = cl_long; + typedef std::true_type is_vector_type; + static const bool type_supported(cl_device_id device) + { + return int64_ok(device); + } +}; +template <> struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "long8"; } + static const char *add_typedef() { return "typedef long8 Type;\n"; } + using scalar_type = cl_long; + typedef std::true_type is_vector_type; + static const bool type_supported(cl_device_id device) + { + return int64_ok(device); + } +}; +template <> struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "long16"; } + static const char *add_typedef() { return "typedef long16 Type;\n"; } + using scalar_type = cl_long; + typedef std::true_type is_vector_type; + static const bool type_supported(cl_device_id device) + { + return int64_ok(device); + } +}; +// cl_ulong +template <> struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "ulong"; } + static const char *add_typedef() { return "typedef ulong Type;\n"; } + static const bool type_supported(cl_device_id device) + { + return int64_ok(device); + } +}; +template <> struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "ulong2"; } + static const char *add_typedef() { return "typedef ulong2 Type;\n"; } + using scalar_type = cl_ulong; + typedef std::true_type is_vector_type; + static const bool type_supported(cl_device_id device) + { + return int64_ok(device); + } +}; +template <> +struct TypeManager + : public CommonTypeManager +{ + static const char *name() { return "ulong3"; } + static const char *add_typedef() { return "typedef ulong3 Type;\n"; } + typedef std::true_type is_sb_vector_size3; + using scalar_type = cl_ulong; + static const bool type_supported(cl_device_id device) + { + return int64_ok(device); + } +}; +template <> struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "ulong4"; } + static const char *add_typedef() { return "typedef ulong4 Type;\n"; } + using scalar_type = cl_ulong; + typedef std::true_type is_vector_type; + static const bool type_supported(cl_device_id device) + { + return int64_ok(device); + } +}; +template <> struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "ulong8"; } + static const char *add_typedef() { return "typedef ulong8 Type;\n"; } + using scalar_type = cl_ulong; + typedef std::true_type is_vector_type; + static const bool type_supported(cl_device_id device) + { + return int64_ok(device); + } +}; +template <> +struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "ulong16"; } + static const char *add_typedef() { return "typedef ulong16 Type;\n"; } + using scalar_type = cl_ulong; + typedef std::true_type is_vector_type; + static const bool type_supported(cl_device_id device) + { + return int64_ok(device); + } }; - -template <> struct TypeIdentity +// cl_float +template <> struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "float"; } + static const char *add_typedef() { return "typedef float Type;\n"; } + static cl_float identify_limits(ArithmeticOp operation) + { + switch (operation) + { + case ArithmeticOp::add_: return 0.0f; + case ArithmeticOp::max_: + return -std::numeric_limits::infinity(); + case ArithmeticOp::min_: + return std::numeric_limits::infinity(); + case ArithmeticOp::mul_: return (cl_float)1; + default: log_error("Unknown operation request"); break; + } + return 0; + } +}; +template <> struct TypeManager : public CommonTypeManager { - static float val() { return 0.F; } + static const char *name() { return "float2"; } + static const char *add_typedef() { return "typedef float2 Type;\n"; } + using scalar_type = cl_float; + typedef std::true_type is_vector_type; }; -template <> struct TypeIdentity +template <> +struct TypeManager + : public CommonTypeManager { - static float val() { return -std::numeric_limits::infinity(); } + static const char *name() { return "float3"; } + static const char *add_typedef() { return "typedef float3 Type;\n"; } + typedef std::true_type is_sb_vector_size3; + using scalar_type = cl_float; }; -template <> struct TypeIdentity +template <> struct TypeManager : public CommonTypeManager { - static float val() { return std::numeric_limits::infinity(); } + static const char *name() { return "float4"; } + static const char *add_typedef() { return "typedef float4 Type;\n"; } + using scalar_type = cl_float; + typedef std::true_type is_vector_type; +}; +template <> struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "float8"; } + static const char *add_typedef() { return "typedef float8 Type;\n"; } + using scalar_type = cl_float; + typedef std::true_type is_vector_type; +}; +template <> +struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "float16"; } + static const char *add_typedef() { return "typedef float16 Type;\n"; } + using scalar_type = cl_float; + typedef std::true_type is_vector_type; }; -template <> struct TypeIdentity +// cl_double +template <> struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "double"; } + static const char *add_typedef() { return "typedef double Type;\n"; } + static cl_double identify_limits(ArithmeticOp operation) + { + switch (operation) + { + case ArithmeticOp::add_: return 0.0; + case ArithmeticOp::max_: + return -std::numeric_limits::infinity(); + case ArithmeticOp::min_: + return std::numeric_limits::infinity(); + case ArithmeticOp::mul_: return (cl_double)1; + default: log_error("Unknown operation request"); break; + } + return 0; + } + static const bool type_supported(cl_device_id device) + { + return double_ok(device); + } +}; +template <> +struct TypeManager : public CommonTypeManager { - static double val() { return 0.L; } + static const char *name() { return "double2"; } + static const char *add_typedef() { return "typedef double2 Type;\n"; } + using scalar_type = cl_double; + typedef std::true_type is_vector_type; + static const bool type_supported(cl_device_id device) + { + return double_ok(device); + } +}; +template <> +struct TypeManager + : public CommonTypeManager +{ + static const char *name() { return "double3"; } + static const char *add_typedef() { return "typedef double3 Type;\n"; } + typedef std::true_type is_sb_vector_size3; + using scalar_type = cl_double; + static const bool type_supported(cl_device_id device) + { + return double_ok(device); + } +}; +template <> +struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "double4"; } + static const char *add_typedef() { return "typedef double4 Type;\n"; } + using scalar_type = cl_double; + typedef std::true_type is_vector_type; + static const bool type_supported(cl_device_id device) + { + return double_ok(device); + } +}; +template <> +struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "double8"; } + static const char *add_typedef() { return "typedef double8 Type;\n"; } + using scalar_type = cl_double; + typedef std::true_type is_vector_type; + static const bool type_supported(cl_device_id device) + { + return double_ok(device); + } +}; +template <> +struct TypeManager : public CommonTypeManager +{ + static const char *name() { return "double16"; } + static const char *add_typedef() { return "typedef double16 Type;\n"; } + using scalar_type = cl_double; + typedef std::true_type is_vector_type; + static const bool type_supported(cl_device_id device) + { + return double_ok(device); + } }; -template <> struct TypeIdentity +// cl_half +template <> +struct TypeManager + : public CommonTypeManager { - static double val() { return -std::numeric_limits::infinity(); } + static const char *name() { return "half"; } + static const char *add_typedef() { return "typedef half Type;\n"; } + typedef std::true_type is_sb_scalar_type; + static subgroups::cl_half identify_limits(ArithmeticOp operation) + { + switch (operation) + { + case ArithmeticOp::add_: return { 0x0000 }; + case ArithmeticOp::max_: return { 0xfc00 }; + case ArithmeticOp::min_: return { 0x7c00 }; + case ArithmeticOp::mul_: return { 0x3c00 }; + default: log_error("Unknown operation request"); break; + } + return { 0 }; + } + static const bool type_supported(cl_device_id device) + { + return half_ok(device); + } }; -template <> struct TypeIdentity +template <> +struct TypeManager + : public CommonTypeManager { - static double val() { return std::numeric_limits::infinity(); } + static const char *name() { return "half2"; } + static const char *add_typedef() { return "typedef half2 Type;\n"; } + using scalar_type = subgroups::cl_half; + typedef std::true_type is_sb_vector_type; + static const bool type_supported(cl_device_id device) + { + return half_ok(device); + } }; +template <> +struct TypeManager + : public CommonTypeManager +{ + static const char *name() { return "half3"; } + static const char *add_typedef() { return "typedef half3 Type;\n"; } + typedef std::true_type is_sb_vector_size3; + using scalar_type = subgroups::cl_half; -template struct TypeCheck; -template <> struct TypeCheck + static const bool type_supported(cl_device_id device) + { + return half_ok(device); + } +}; +template <> +struct TypeManager + : public CommonTypeManager +{ + static const char *name() { return "half4"; } + static const char *add_typedef() { return "typedef half4 Type;\n"; } + using scalar_type = subgroups::cl_half; + typedef std::true_type is_sb_vector_type; + static const bool type_supported(cl_device_id device) + { + return half_ok(device); + } +}; +template <> +struct TypeManager + : public CommonTypeManager { - static bool val(cl_device_id) { return true; } + static const char *name() { return "half8"; } + static const char *add_typedef() { return "typedef half8 Type;\n"; } + using scalar_type = subgroups::cl_half; + typedef std::true_type is_sb_vector_type; + + static const bool type_supported(cl_device_id device) + { + return half_ok(device); + } }; -template <> struct TypeCheck +template <> +struct TypeManager + : public CommonTypeManager { - static bool val(cl_device_id) { return true; } + static const char *name() { return "half16"; } + static const char *add_typedef() { return "typedef half16 Type;\n"; } + using scalar_type = subgroups::cl_half; + typedef std::true_type is_sb_vector_type; + static const bool type_supported(cl_device_id device) + { + return half_ok(device); + } }; -static bool int64_ok(cl_device_id device) +// set scalar value to vector of halfs +template +typename std::enable_if::is_sb_vector_type::value>::type +set_value(Ty &lhs, const cl_ulong &rhs) { - char profile[128]; - int error; + const int size = sizeof(Ty) / sizeof(typename TypeManager::scalar_type); + for (auto i = 0; i < size; ++i) + { + lhs.data.s[i] = rhs; + } +} - error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), - (void *)&profile, NULL); - if (error) + +// set scalar value to vector +template +typename std::enable_if::is_vector_type::value>::type +set_value(Ty &lhs, const cl_ulong &rhs) +{ + const int size = sizeof(Ty) / sizeof(typename TypeManager::scalar_type); + for (auto i = 0; i < size; ++i) { - log_info("clGetDeviceInfo failed with CL_DEVICE_PROFILE\n"); - return false; + lhs.s[i] = rhs; } +} - if (strcmp(profile, "EMBEDDED_PROFILE") == 0) - return is_extension_available(device, "cles_khr_int64"); +// set vector to vector value +template +typename std::enable_if::is_vector_type::value>::type +set_value(Ty &lhs, const Ty &rhs) +{ + lhs = rhs; +} - return true; +// set scalar value to vector size 3 +template +typename std::enable_if::is_sb_vector_size3::value>::type +set_value(Ty &lhs, const cl_ulong &rhs) +{ + for (auto i = 0; i < 3; ++i) + { + lhs.data.s[i] = rhs; + } } -template <> struct TypeCheck +// set scalar value to scalar +template +typename std::enable_if::value>::type +set_value(Ty &lhs, const cl_ulong &rhs) { - static bool val(cl_device_id device) { return int64_ok(device); } -}; -template <> struct TypeCheck + lhs = static_cast(rhs); +} + +// set scalar value to half scalar +template +typename std::enable_if::is_sb_scalar_type::value>::type +set_value(Ty &lhs, const cl_ulong &rhs) { - static bool val(cl_device_id device) { return int64_ok(device); } -}; -template <> struct TypeCheck + lhs.data = cl_half_from_float(static_cast(rhs), g_rounding_mode); +} + +// compare for common vectors +template +typename std::enable_if::is_vector_type::value, bool>::type +compare(const Ty &lhs, const Ty &rhs) { - static bool val(cl_device_id) { return true; } -}; -template <> struct TypeCheck + const int size = sizeof(Ty) / sizeof(typename TypeManager::scalar_type); + for (auto i = 0; i < size; ++i) + { + if (lhs.s[i] != rhs.s[i]) + { + return false; + } + } + return true; +} + +// compare for vectors 3 +template +typename std::enable_if::is_sb_vector_size3::value, bool>::type +compare(const Ty &lhs, const Ty &rhs) { - static bool val(cl_device_id device) + for (auto i = 0; i < 3; ++i) { - return is_extension_available(device, "cl_khr_fp16"); + if (lhs.data.s[i] != rhs.data.s[i]) + { + return false; + } } -}; -template <> struct TypeCheck + return true; +} + +// compare for half vectors +template +typename std::enable_if::is_sb_vector_type::value, bool>::type +compare(const Ty &lhs, const Ty &rhs) { - static bool val(cl_device_id device) + const int size = sizeof(Ty) / sizeof(typename TypeManager::scalar_type); + for (auto i = 0; i < size; ++i) { - int error; - cl_device_fp_config c; - error = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(c), - (void *)&c, NULL); - if (error) + if (lhs.data.s[i] != rhs.data.s[i]) { - log_info( - "clGetDeviceInfo failed with CL_DEVICE_DOUBLE_FP_CONFIG\n"); return false; } - return c != 0; } -}; + return true; +} +// compare for scalars +template +typename std::enable_if::value, bool>::type +compare(const Ty &lhs, const Ty &rhs) +{ + return lhs == rhs; +} + +// compare for scalar halfs +template +typename std::enable_if::is_sb_scalar_type::value, bool>::type +compare(const Ty &lhs, const Ty &rhs) +{ + return lhs.data == rhs.data; +} + +template inline bool compare_ordered(const Ty &lhs, const Ty &rhs) +{ + return lhs == rhs; +} + +template <> +inline bool compare_ordered(const subgroups::cl_half &lhs, + const subgroups::cl_half &rhs) +{ + return cl_half_to_float(lhs.data) == cl_half_to_float(rhs.data); +} + +template +inline bool compare_ordered(const subgroups::cl_half &lhs, const int &rhs) +{ + return cl_half_to_float(lhs.data) == rhs; +} // Run a test kernel to compute the result of a built-in on an input static int run_kernel(cl_context context, cl_command_queue queue, @@ -318,6 +1369,9 @@ static int run_kernel(cl_context context, cl_command_queue queue, NULL); test_error(error, "clEnqueueWriteBuffer failed"); + error = clEnqueueWriteBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0, NULL, + NULL); + test_error(error, "clEnqueueWriteBuffer failed"); error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); test_error(error, "clEnqueueNDRangeKernel failed"); @@ -337,61 +1391,82 @@ static int run_kernel(cl_context context, cl_command_queue queue, } // Driver for testing a single built in function -template -struct test +template struct test { - static int run(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements, const char *kname, - const char *src, int dynscl, bool useCoreSubgroups) + static test_status run(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements, + const char *kname, const char *src, + WorkGroupParams test_params) { size_t tmp; - int error; + cl_int error; int subgroup_size, num_subgroups; size_t realSize; - size_t global; - size_t local; + size_t global = test_params.global_workgroup_size; + size_t local = test_params.local_workgroup_size; clProgramWrapper program; clKernelWrapper kernel; cl_platform_id platform; - cl_int sgmap[2 * GSIZE]; - Ty mapin[LSIZE]; - Ty mapout[LSIZE]; + std::vector sgmap; + sgmap.resize(4 * global); + std::vector mapin; + mapin.resize(local); + std::vector mapout; + mapout.resize(local); + std::stringstream kernel_sstr; + + Fns::log_test(test_params, ""); + kernel_sstr << "#define NR_OF_ACTIVE_WORK_ITEMS "; + kernel_sstr << NR_OF_ACTIVE_WORK_ITEMS << "\n"; // Make sure a test of type Ty is supported by the device - if (!TypeCheck::val(device)) return 0; + if (!TypeManager::type_supported(device)) + { + log_info("Data type not supported : %s\n", TypeManager::name()); + return TEST_SKIPPED_ITSELF; + } + + if (strstr(TypeManager::name(), "double")) + { + kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n"; + } + else if (strstr(TypeManager::name(), "half")) + { + kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_fp16: enable\n"; + } error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), (void *)&platform, NULL); - test_error(error, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM"); - std::stringstream kernel_sstr; - if (useCoreSubgroups) + test_error_fail(error, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM"); + if (test_params.use_core_subgroups) { kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"; } kernel_sstr << "#define XY(M,I) M[I].x = get_sub_group_local_id(); " "M[I].y = get_sub_group_id();\n"; - kernel_sstr << TypeDef::val(); + kernel_sstr << TypeManager::add_typedef(); kernel_sstr << src; const std::string &kernel_str = kernel_sstr.str(); const char *kernel_src = kernel_str.c_str(); - error = create_single_kernel_helper_with_build_options( - context, &program, &kernel, 1, &kernel_src, kname, "-cl-std=CL2.0"); - if (error != 0) return error; + error = create_single_kernel_helper(context, &program, &kernel, 1, + &kernel_src, kname); + if (error != CL_SUCCESS) return TEST_FAIL; // Determine some local dimensions to use for the test. - global = GSIZE; - error = get_max_common_work_group_size(context, kernel, GSIZE, &local); - test_error(error, "get_max_common_work_group_size failed"); + error = get_max_common_work_group_size( + context, kernel, test_params.global_workgroup_size, &local); + test_error_fail(error, "get_max_common_work_group_size failed"); // Limit it a bit so we have muliple work groups - // Ideally this will still be large enough to give us multiple subgroups - if (local > LSIZE) local = LSIZE; + // Ideally this will still be large enough to give us multiple + if (local > test_params.local_workgroup_size) + local = test_params.local_workgroup_size; + // Get the sub group info - subgroupsAPI subgroupsApiSet(platform, useCoreSubgroups); + subgroupsAPI subgroupsApiSet(platform, test_params.use_core_subgroups); clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfo_ptr = subgroupsApiSet.clGetKernelSubGroupInfo_ptr(); if (clGetKernelSubGroupInfo_ptr == NULL) @@ -435,8 +1510,9 @@ struct test std::vector idata; std::vector odata; - size_t input_array_size = GSIZE; - size_t output_array_size = GSIZE; + size_t input_array_size = global; + size_t output_array_size = global; + int dynscl = test_params.dynsc; if (dynscl != 0) { @@ -448,29 +1524,154 @@ struct test idata.resize(input_array_size); odata.resize(output_array_size); + if (test_params.divergence_mask_arg != -1) + { + cl_uint4 mask_vector; + mask_vector.x = 0xffffffffU; + mask_vector.y = 0xffffffffU; + mask_vector.z = 0xffffffffU; + mask_vector.w = 0xffffffffU; + error = clSetKernelArg(kernel, test_params.divergence_mask_arg, + sizeof(cl_uint4), &mask_vector); + test_error_fail(error, "Unable to set divergence mask argument"); + } + // Run the kernel once on zeroes to get the map - memset(&idata[0], 0, input_array_size * sizeof(Ty)); - error = run_kernel(context, queue, kernel, global, local, &idata[0], - input_array_size * sizeof(Ty), sgmap, - global * sizeof(cl_int) * 2, &odata[0], + memset(idata.data(), 0, input_array_size * sizeof(Ty)); + error = run_kernel(context, queue, kernel, global, local, idata.data(), + input_array_size * sizeof(Ty), sgmap.data(), + global * sizeof(cl_int4), odata.data(), output_array_size * sizeof(Ty), TSIZE * sizeof(Ty)); - if (error) return error; + test_error_fail(error, "Running kernel first time failed"); // Generate the desired input for the kernel - Fns::gen(&idata[0], mapin, sgmap, subgroup_size, (int)local, - (int)global / (int)local); + test_params.subgroup_size = subgroup_size; + Fns::gen(idata.data(), mapin.data(), sgmap.data(), test_params); + + test_status combined_status; + + if (test_params.divergence_mask_arg != -1) + { + combined_status = TEST_SKIPPED_ITSELF; + + for (auto &mask : test_params.all_work_item_masks) + { + test_params.work_items_mask = mask; + cl_uint4 mask_vector = bs128_to_cl_uint4(mask); + clSetKernelArg(kernel, test_params.divergence_mask_arg, + sizeof(cl_uint4), &mask_vector); + error = run_kernel(context, queue, kernel, global, local, + idata.data(), input_array_size * sizeof(Ty), + sgmap.data(), global * sizeof(cl_int4), + odata.data(), output_array_size * sizeof(Ty), + TSIZE * sizeof(Ty)); + test_error_fail(error, "Running kernel second time failed"); + + // Check the result + test_status status = + Fns::chk(idata.data(), odata.data(), mapin.data(), + mapout.data(), sgmap.data(), test_params); + + if (status == TEST_FAIL + || (status == TEST_PASS && combined_status != TEST_FAIL)) + combined_status = status; - error = run_kernel(context, queue, kernel, global, local, &idata[0], - input_array_size * sizeof(Ty), sgmap, - global * sizeof(cl_int) * 2, &odata[0], + if (status == TEST_FAIL) break; + } + } + else + { + error = + run_kernel(context, queue, kernel, global, local, idata.data(), + input_array_size * sizeof(Ty), sgmap.data(), + global * sizeof(cl_int4), odata.data(), output_array_size * sizeof(Ty), TSIZE * sizeof(Ty)); - if (error) return error; + test_error_fail(error, "Running kernel second time failed"); + + // Check the result + combined_status = + Fns::chk(idata.data(), odata.data(), mapin.data(), + mapout.data(), sgmap.data(), test_params); + } + // Detailed failure and skip messages should be logged by Fns::gen + // and Fns::chk. + if (combined_status == TEST_PASS) + { + Fns::log_test(test_params, " passed"); + } + else if (combined_status == TEST_FAIL) + { + test_fail("Data verification failed\n"); + } + return combined_status; + } +}; + +static void set_last_workgroup_params(int non_uniform_size, + int &number_of_subgroups, + int subgroup_size, int &workgroup_size, + int &last_subgroup_size) +{ + number_of_subgroups = 1 + non_uniform_size / subgroup_size; + last_subgroup_size = non_uniform_size % subgroup_size; + workgroup_size = non_uniform_size; +} + +template +static void set_randomdata_for_subgroup(Ty *workgroup, int wg_offset, + int current_sbs) +{ + int randomize_data = (int)(genrand_int32(gMTdata) % 3); + // Initialize data matrix indexed by local id and sub group id + switch (randomize_data) + { + case 0: + memset(&workgroup[wg_offset], 0, current_sbs * sizeof(Ty)); + break; + case 1: { + memset(&workgroup[wg_offset], 0, current_sbs * sizeof(Ty)); + int wi_id = (int)(genrand_int32(gMTdata) % (cl_uint)current_sbs); + set_value(workgroup[wg_offset + wi_id], 41); + } + break; + case 2: + memset(&workgroup[wg_offset], 0xff, current_sbs * sizeof(Ty)); + break; + } +} +struct RunTestForType +{ + RunTestForType(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements, + WorkGroupParams test_params) + : device_(device), context_(context), queue_(queue), + num_elements_(num_elements), test_params_(test_params) + {} + template + int run_impl(const std::string &function_name) + { + int error = TEST_PASS; + std::string source = + std::regex_replace(test_params_.get_kernel_source(function_name), + std::regex("\\%s"), function_name); + std::string kernel_name = "test_" + function_name; + error = + test::run(device_, context_, queue_, num_elements_, + kernel_name.c_str(), source.c_str(), test_params_); - // Check the result - return Fns::chk(&idata[0], &odata[0], mapin, mapout, sgmap, - subgroup_size, (int)local, (int)global / (int)local); + // If we return TEST_SKIPPED_ITSELF here, then an entire suite may be + // reported as having been skipped even if some tests within it + // passed, as the status codes are erroneously ORed together: + return error == TEST_FAIL ? TEST_FAIL : TEST_PASS; } + +private: + cl_device_id device_; + cl_context context_; + cl_command_queue queue_; + int num_elements_; + WorkGroupParams test_params_; }; #endif diff --git a/test_conformance/subgroups/test_barrier.cpp b/test_conformance/subgroups/test_barrier.cpp index e6ce1d2ecd..d415eefbb0 100644 --- a/test_conformance/subgroups/test_barrier.cpp +++ b/test_conformance/subgroups/test_barrier.cpp @@ -59,10 +59,26 @@ static const char *gbar_source = // barrier test functions template struct BAR { - static void gen(cl_int *x, cl_int *t, cl_int *m, int ns, int nw, int ng) + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + if (Which == 0) + log_info(" sub_group_barrier(CLK_LOCAL_MEM_FENCE)...%s\n", + extra_text); + else + log_info(" sub_group_barrier(CLK_GLOBAL_MEM_FENCE)...%s\n", + extra_text); + } + + static void gen(cl_int *x, cl_int *t, cl_int *m, + const WorkGroupParams &test_params) { int i, ii, j, k, n; + int nw = test_params.local_workgroup_size; + int ns = test_params.subgroup_size; + int ng = test_params.global_workgroup_size; int nj = (nw + ns - 1) / ns; + ng = ng / nw; int e; ii = 0; @@ -79,8 +95,7 @@ template struct BAR // Now map into work group using map from device for (j = 0; j < nw; ++j) { - i = m[2 * j + 1] * ns + m[2 * j]; - x[j] = t[i]; + x[j] = t[j]; } x += nw; @@ -88,26 +103,24 @@ template struct BAR } } - static int chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my, cl_int *m, - int ns, int nw, int ng) + static test_status chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my, + cl_int *m, const WorkGroupParams &test_params) { int ii, i, j, k, n; + int nw = test_params.local_workgroup_size; + int ns = test_params.subgroup_size; + int ng = test_params.global_workgroup_size; int nj = (nw + ns - 1) / ns; + ng = ng / nw; cl_int tr, rr; - if (Which == 0) - log_info(" sub_group_barrier(CLK_LOCAL_MEM_FENCE)...\n"); - else - log_info(" sub_group_barrier(CLK_GLOBAL_MEM_FENCE)...\n"); - for (k = 0; k < ng; ++k) { // Map to array indexed to array indexed by local ID and sub group for (j = 0; j < nw; ++j) { - i = m[2 * j + 1] * ns + m[2 * j]; - mx[i] = x[j]; - my[i] = y[j]; + mx[j] = x[j]; + my[j] = y[j]; } for (j = 0; j < nj; ++j) @@ -123,9 +136,10 @@ template struct BAR if (tr != rr) { log_error("ERROR: sub_group_barrier mismatch for local " - "id %d in sub group %d in group %d\n", - i, j, k); - return -1; + "id %d in sub group %d in group %d expected " + "%d got %d\n", + i, j, k, tr, rr); + return TEST_FAIL; } } } @@ -135,7 +149,7 @@ template struct BAR m += 2 * nw; } - return 0; + return TEST_PASS; } }; @@ -144,18 +158,18 @@ int test_barrier_functions(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements, bool useCoreSubgroups) { - int error; + int error = TEST_PASS; // Adjust these individually below if desired/needed -#define G 2000 -#define L 200 - - error = test, G, L>::run(device, context, queue, - num_elements, "test_lbar", - lbar_source, 0, useCoreSubgroups); - error = test, G, L, G>::run( - device, context, queue, num_elements, "test_gbar", gbar_source, 0, - useCoreSubgroups); + constexpr size_t global_work_size = 2000; + constexpr size_t local_work_size = 200; + WorkGroupParams test_params(global_work_size, local_work_size); + test_params.use_core_subgroups = useCoreSubgroups; + error = test>::run(device, context, queue, num_elements, + "test_lbar", lbar_source, test_params); + error |= test, global_work_size>::run( + device, context, queue, num_elements, "test_gbar", gbar_source, + test_params); return error; } @@ -179,4 +193,4 @@ int test_barrier_functions_ext(cl_device_id device, cl_context context, } return test_barrier_functions(device, context, queue, num_elements, false); -} \ No newline at end of file +} diff --git a/test_conformance/subgroups/test_ifp.cpp b/test_conformance/subgroups/test_ifp.cpp index 02850e5f7d..f2bd5b9257 100644 --- a/test_conformance/subgroups/test_ifp.cpp +++ b/test_conformance/subgroups/test_ifp.cpp @@ -46,7 +46,7 @@ static const char *ifp_source = "#define INST_COUNT 0x3\n" "\n" "__kernel void\n" - "test_ifp(const __global int *in, __global int2 *xy, __global int *out)\n" + "test_ifp(const __global int *in, __global int4 *xy, __global int *out)\n" "{\n" " __local atomic_int loc[NUM_LOC];\n" "\n" @@ -225,10 +225,21 @@ void run_insts(cl_int *x, cl_int *p, int n) struct IFP { - static void gen(cl_int *x, cl_int *t, cl_int *, int ns, int nw, int ng) + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + log_info(" independent forward progress...%s\n", extra_text); + } + + static void gen(cl_int *x, cl_int *t, cl_int *, + const WorkGroupParams &test_params) { int k; + int nw = test_params.local_workgroup_size; + int ns = test_params.subgroup_size; + int ng = test_params.global_workgroup_size; int nj = (nw + ns - 1) / ns; + ng = ng / nw; // We need at least 2 sub groups per group for this test if (nj == 1) return; @@ -240,16 +251,18 @@ struct IFP } } - static int chk(cl_int *x, cl_int *y, cl_int *t, cl_int *, cl_int *, int ns, - int nw, int ng) + static test_status chk(cl_int *x, cl_int *y, cl_int *t, cl_int *, cl_int *, + const WorkGroupParams &test_params) { int i, k; + int nw = test_params.local_workgroup_size; + int ns = test_params.subgroup_size; + int ng = test_params.global_workgroup_size; int nj = (nw + ns - 1) / ns; + ng = ng / nw; - // We need at least 2 sub groups per group for this tes - if (nj == 1) return 0; - - log_info(" independent forward progress...\n"); + // We need at least 2 sub groups per group for this test + if (nj == 1) return TEST_SKIPPED_ITSELF; for (k = 0; k < ng; ++k) { @@ -261,28 +274,31 @@ struct IFP log_error( "ERROR: mismatch at element %d in work group %d\n", i, k); - return -1; + return TEST_FAIL; } } x += nj * (NUM_LOC + 1); y += NUM_LOC; } - return 0; + return TEST_PASS; } }; int test_ifp(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements, bool useCoreSubgroups) { - int error; + int error = TEST_PASS; + // Global/local work group sizes // Adjust these individually below if desired/needed -#define G 2000 -#define L 200 - error = test::run(device, context, queue, num_elements, - "test_ifp", ifp_source, NUM_LOC + 1, - useCoreSubgroups); + constexpr size_t global_work_size = 2000; + constexpr size_t local_work_size = 200; + WorkGroupParams test_params(global_work_size, local_work_size); + test_params.use_core_subgroups = useCoreSubgroups; + test_params.dynsc = NUM_LOC + 1; + error = test::run(device, context, queue, num_elements, + "test_ifp", ifp_source, test_params); return error; } @@ -348,17 +364,21 @@ int test_ifp_ext(cl_device_id device, cl_context context, } // ifp only in subgroup functions tests: test_status error; - error = checkIFPSupport(device, ifpSupport); - if (error != TEST_PASS) - { - return error; - } - if (ifpSupport == false) + auto device_cl_version = get_device_cl_version(device); + if (device_cl_version >= Version(2, 1)) { - log_info( - "Error reason: the extension cl_khr_subgroups requires that " - "Independed forward progress has to be supported by device.\n"); - return TEST_FAIL; + error = checkIFPSupport(device, ifpSupport); + if (error != TEST_PASS) + { + return error; + } + if (ifpSupport == false) + { + log_info( + "Error reason: the extension cl_khr_subgroups requires that " + "Independed forward progress has to be supported by device.\n"); + return TEST_FAIL; + } } return test_ifp(device, context, queue, num_elements, false); -} \ No newline at end of file +} diff --git a/test_conformance/subgroups/test_queries.cpp b/test_conformance/subgroups/test_queries.cpp index 2ad3d7fad7..761ca7a6a1 100644 --- a/test_conformance/subgroups/test_queries.cpp +++ b/test_conformance/subgroups/test_queries.cpp @@ -67,9 +67,8 @@ int test_sub_group_info(cl_device_id device, cl_context context, const std::string &kernel_str = kernel_sstr.str(); const char *kernel_src = kernel_str.c_str(); - error = create_single_kernel_helper_with_build_options( - context, &program, &kernel, 1, &kernel_src, "query_kernel", - "-cl-std=CL2.0"); + error = create_single_kernel_helper(context, &program, &kernel, 1, + &kernel_src, "query_kernel"); if (error != 0) return error; // Determine some local dimensions to use for the test. diff --git a/test_conformance/subgroups/test_subgroup.cpp b/test_conformance/subgroups/test_subgroup.cpp new file mode 100644 index 0000000000..aa9b32cbb5 --- /dev/null +++ b/test_conformance/subgroups/test_subgroup.cpp @@ -0,0 +1,225 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "procs.h" +#include "subhelpers.h" +#include "subgroup_common_kernels.h" +#include "subgroup_common_templates.h" +#include "harness/conversions.h" +#include "harness/typeWrappers.h" + +namespace { +// Any/All test functions +template struct AA +{ + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + log_info(" sub_group_%s...%s\n", operation_names(operation), + extra_text); + } + + static void gen(cl_int *x, cl_int *t, cl_int *m, + const WorkGroupParams &test_params) + { + int i, ii, j, k, n; + int ng = test_params.global_workgroup_size; + int nw = test_params.local_workgroup_size; + int ns = test_params.subgroup_size; + int nj = (nw + ns - 1) / ns; + int e; + ng = ng / nw; + ii = 0; + for (k = 0; k < ng; ++k) + { + for (j = 0; j < nj; ++j) + { + ii = j * ns; + n = ii + ns > nw ? nw - ii : ns; + e = (int)(genrand_int32(gMTdata) % 3); + + // Initialize data matrix indexed by local id and sub group id + switch (e) + { + case 0: memset(&t[ii], 0, n * sizeof(cl_int)); break; + case 1: + memset(&t[ii], 0, n * sizeof(cl_int)); + i = (int)(genrand_int32(gMTdata) % (cl_uint)n); + t[ii + i] = 41; + break; + case 2: memset(&t[ii], 0xff, n * sizeof(cl_int)); break; + } + } + + // Now map into work group using map from device + for (j = 0; j < nw; ++j) + { + x[j] = t[j]; + } + + x += nw; + m += 4 * nw; + } + } + + static test_status chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my, + cl_int *m, const WorkGroupParams &test_params) + { + int ii, i, j, k, n; + int ng = test_params.global_workgroup_size; + int nw = test_params.local_workgroup_size; + int ns = test_params.subgroup_size; + int nj = (nw + ns - 1) / ns; + cl_int taa, raa; + ng = ng / nw; + + for (k = 0; k < ng; ++k) + { + // Map to array indexed to array indexed by local ID and sub group + for (j = 0; j < nw; ++j) + { + mx[j] = x[j]; + my[j] = y[j]; + } + + for (j = 0; j < nj; ++j) + { + ii = j * ns; + n = ii + ns > nw ? nw - ii : ns; + + // Compute target + if (operation == NonUniformVoteOp::any) + { + taa = 0; + for (i = 0; i < n; ++i) taa |= mx[ii + i] != 0; + } + + if (operation == NonUniformVoteOp::all) + { + taa = 1; + for (i = 0; i < n; ++i) taa &= mx[ii + i] != 0; + } + + // Check result + for (i = 0; i < n; ++i) + { + raa = my[ii + i] != 0; + if (raa != taa) + { + log_error("ERROR: sub_group_%s mismatch for local id " + "%d in sub group %d in group %d\n", + operation_names(operation), i, j, k); + return TEST_FAIL; + } + } + } + + x += nw; + y += nw; + m += 4 * nw; + } + return TEST_PASS; + } +}; + +static const char *any_source = "__kernel void test_any(const __global Type " + "*in, __global int4 *xy, __global Type *out)\n" + "{\n" + " int gid = get_global_id(0);\n" + " XY(xy,gid);\n" + " out[gid] = sub_group_any(in[gid]);\n" + "}\n"; + +static const char *all_source = "__kernel void test_all(const __global Type " + "*in, __global int4 *xy, __global Type *out)\n" + "{\n" + " int gid = get_global_id(0);\n" + " XY(xy,gid);\n" + " out[gid] = sub_group_all(in[gid]);\n" + "}\n"; + + +template +int run_broadcast_scan_reduction_for_type(RunTestForType rft) +{ + int error = rft.run_impl>( + "sub_group_broadcast"); + error |= + rft.run_impl>("sub_group_reduce_add"); + error |= + rft.run_impl>("sub_group_reduce_max"); + error |= + rft.run_impl>("sub_group_reduce_min"); + error |= rft.run_impl>( + "sub_group_scan_inclusive_add"); + error |= rft.run_impl>( + "sub_group_scan_inclusive_max"); + error |= rft.run_impl>( + "sub_group_scan_inclusive_min"); + error |= rft.run_impl>( + "sub_group_scan_exclusive_add"); + error |= rft.run_impl>( + "sub_group_scan_exclusive_max"); + error |= rft.run_impl>( + "sub_group_scan_exclusive_min"); + return error; +} + +} +// Entry point from main +int test_subgroup_functions(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements, + bool useCoreSubgroups) +{ + constexpr size_t global_work_size = 2000; + constexpr size_t local_work_size = 200; + WorkGroupParams test_params(global_work_size, local_work_size); + test_params.save_kernel_source(sub_group_reduction_scan_source); + test_params.save_kernel_source(sub_group_generic_source, + "sub_group_broadcast"); + + RunTestForType rft(device, context, queue, num_elements, test_params); + int error = + rft.run_impl>("sub_group_any"); + error |= rft.run_impl>("sub_group_all"); + error |= run_broadcast_scan_reduction_for_type(rft); + error |= run_broadcast_scan_reduction_for_type(rft); + error |= run_broadcast_scan_reduction_for_type(rft); + error |= run_broadcast_scan_reduction_for_type(rft); + error |= run_broadcast_scan_reduction_for_type(rft); + error |= run_broadcast_scan_reduction_for_type(rft); + error |= run_broadcast_scan_reduction_for_type(rft); + return error; +} + +int test_subgroup_functions_core(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_subgroup_functions(device, context, queue, num_elements, true); +} + +int test_subgroup_functions_ext(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + bool hasExtension = is_extension_available(device, "cl_khr_subgroups"); + + if (!hasExtension) + { + log_info( + "Device does not support 'cl_khr_subgroups'. Skipping the test.\n"); + return TEST_SKIPPED_ITSELF; + } + return test_subgroup_functions(device, context, queue, num_elements, false); +} diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp new file mode 100644 index 0000000000..4148707eba --- /dev/null +++ b/test_conformance/subgroups/test_subgroup_ballot.cpp @@ -0,0 +1,1074 @@ +// +// Copyright (c) 2021 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "procs.h" +#include "subhelpers.h" +#include "subgroup_common_templates.h" +#include "harness/typeWrappers.h" +#include + +namespace { +// Test for ballot functions +template struct BALLOT +{ + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + log_info(" sub_group_ballot...%s\n", extra_text); + } + + static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params) + { + int gws = test_params.global_workgroup_size; + int lws = test_params.local_workgroup_size; + int sbs = test_params.subgroup_size; + int sb_number = (lws + sbs - 1) / sbs; + int non_uniform_size = gws % lws; + int wg_number = gws / lws; + wg_number = non_uniform_size ? wg_number + 1 : wg_number; + int last_subgroup_size = 0; + + for (int wg_id = 0; wg_id < wg_number; ++wg_id) + { // for each work_group + if (non_uniform_size && wg_id == wg_number - 1) + { + set_last_workgroup_params(non_uniform_size, sb_number, sbs, lws, + last_subgroup_size); + } + for (int sb_id = 0; sb_id < sb_number; ++sb_id) + { // for each subgroup + int wg_offset = sb_id * sbs; + int current_sbs; + if (last_subgroup_size && sb_id == sb_number - 1) + { + current_sbs = last_subgroup_size; + } + else + { + current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs; + } + + for (int wi_id = 0; wi_id < current_sbs; wi_id++) + { + cl_uint v; + if (genrand_bool(gMTdata)) + { + v = genrand_bool(gMTdata); + } + else if (genrand_bool(gMTdata)) + { + v = 1U << ((genrand_int32(gMTdata) % 31) + 1); + } + else + { + v = genrand_int32(gMTdata); + } + cl_uint4 v4 = { v, 0, 0, 0 }; + t[wi_id + wg_offset] = v4; + } + } + // Now map into work group using map from device + for (int wi_id = 0; wi_id < lws; ++wi_id) + { + x[wi_id] = t[wi_id]; + } + x += lws; + m += 4 * lws; + } + } + + static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, + const WorkGroupParams &test_params) + { + int gws = test_params.global_workgroup_size; + int lws = test_params.local_workgroup_size; + int sbs = test_params.subgroup_size; + int sb_number = (lws + sbs - 1) / sbs; + int non_uniform_size = gws % lws; + int wg_number = gws / lws; + wg_number = non_uniform_size ? wg_number + 1 : wg_number; + int last_subgroup_size = 0; + + for (int wg_id = 0; wg_id < wg_number; ++wg_id) + { // for each work_group + if (non_uniform_size && wg_id == wg_number - 1) + { + set_last_workgroup_params(non_uniform_size, sb_number, sbs, lws, + last_subgroup_size); + } + for (int wi_id = 0; wi_id < lws; ++wi_id) + { // inside the work_group + mx[wi_id] = x[wi_id]; // read host inputs for work_group + my[wi_id] = y[wi_id]; // read device outputs for work_group + } + + for (int sb_id = 0; sb_id < sb_number; ++sb_id) + { // for each subgroup + int wg_offset = sb_id * sbs; + int current_sbs; + if (last_subgroup_size && sb_id == sb_number - 1) + { + current_sbs = last_subgroup_size; + } + else + { + current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs; + } + + bs128 expected_result_bs = 0; + + std::set active_work_items; + for (int wi_id = 0; wi_id < current_sbs; ++wi_id) + { + if (test_params.work_items_mask.test(wi_id)) + { + bool predicate = (mx[wg_offset + wi_id].s0 != 0); + expected_result_bs |= (bs128(predicate) << wi_id); + active_work_items.insert(wi_id); + } + } + if (active_work_items.empty()) + { + continue; + } + + cl_uint4 expected_result = + bs128_to_cl_uint4(expected_result_bs); + for (const int &active_work_item : active_work_items) + { + int wi_id = active_work_item; + + cl_uint4 device_result = my[wg_offset + wi_id]; + bs128 device_result_bs = cl_uint4_to_bs128(device_result); + + if (device_result_bs != expected_result_bs) + { + log_error( + "ERROR: sub_group_ballot mismatch for local id " + "%d in sub group %d in group %d obtained {%d, %d, " + "%d, %d}, expected {%d, %d, %d, %d}\n", + wi_id, sb_id, wg_id, device_result.s0, + device_result.s1, device_result.s2, + device_result.s3, expected_result.s0, + expected_result.s1, expected_result.s2, + expected_result.s3); + return TEST_FAIL; + } + } + } + + x += lws; + y += lws; + m += 4 * lws; + } + + return TEST_PASS; + } +}; + +// Test for bit extract ballot functions +template struct BALLOT_BIT_EXTRACT +{ + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + log_info(" sub_group_ballot_%s(%s)...%s\n", operation_names(operation), + TypeManager::name(), extra_text); + } + + static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params) + { + int wi_id, sb_id, wg_id, l; + int gws = test_params.global_workgroup_size; + int lws = test_params.local_workgroup_size; + int sbs = test_params.subgroup_size; + int sb_number = (lws + sbs - 1) / sbs; + int wg_number = gws / lws; + int limit_sbs = sbs > 100 ? 100 : sbs; + int non_uniform_size = gws % lws; + + for (wg_id = 0; wg_id < wg_number; ++wg_id) + { // for each work_group + for (sb_id = 0; sb_id < sb_number; ++sb_id) + { // for each subgroup + int wg_offset = sb_id * sbs; + int current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs; + // rand index to bit extract + int index_for_odd = (int)(genrand_int32(gMTdata) & 0x7fffffff) + % (limit_sbs > current_sbs ? current_sbs : limit_sbs); + int index_for_even = (int)(genrand_int32(gMTdata) & 0x7fffffff) + % (limit_sbs > current_sbs ? current_sbs : limit_sbs); + for (wi_id = 0; wi_id < current_sbs; ++wi_id) + { + // index of the third element int the vector. + int midx = 4 * wg_offset + 4 * wi_id + 2; + // storing information about index to bit extract + m[midx] = (cl_int)index_for_odd; + m[++midx] = (cl_int)index_for_even; + } + set_randomdata_for_subgroup(t, wg_offset, current_sbs); + } + + // Now map into work group using map from device + for (wi_id = 0; wi_id < lws; ++wi_id) + { + x[wi_id] = t[wi_id]; + } + + x += lws; + m += 4 * lws; + } + } + + static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, + const WorkGroupParams &test_params) + { + int wi_id, wg_id, l, sb_id; + int gws = test_params.global_workgroup_size; + int lws = test_params.local_workgroup_size; + int sbs = test_params.subgroup_size; + int sb_number = (lws + sbs - 1) / sbs; + int wg_number = gws / lws; + cl_uint4 expected_result, device_result; + int last_subgroup_size = 0; + int current_sbs = 0; + int non_uniform_size = gws % lws; + + for (wg_id = 0; wg_id < wg_number; ++wg_id) + { // for each work_group + if (non_uniform_size && wg_id == wg_number - 1) + { + set_last_workgroup_params(non_uniform_size, sb_number, sbs, lws, + last_subgroup_size); + } + // Map to array indexed to array indexed by local ID and sub group + for (wi_id = 0; wi_id < lws; ++wi_id) + { // inside the work_group + // read host inputs for work_group + mx[wi_id] = x[wi_id]; + // read device outputs for work_group + my[wi_id] = y[wi_id]; + } + + for (sb_id = 0; sb_id < sb_number; ++sb_id) + { // for each subgroup + int wg_offset = sb_id * sbs; + if (last_subgroup_size && sb_id == sb_number - 1) + { + current_sbs = last_subgroup_size; + } + else + { + current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs; + } + // take index of array where info which work_item will + // be broadcast its value is stored + int midx = 4 * wg_offset + 2; + // take subgroup local id of this work_item + int index_for_odd = (int)m[midx]; + int index_for_even = (int)m[++midx]; + + for (wi_id = 0; wi_id < current_sbs; ++wi_id) + { // for each subgroup + int bit_value = 0; + // from which value of bitfield bit + // verification will be done + int take_shift = + (wi_id & 1) ? index_for_odd % 32 : index_for_even % 32; + int bit_mask = 1 << take_shift; + + if (wi_id < 32) + (mx[wg_offset + wi_id].s0 & bit_mask) > 0 + ? bit_value = 1 + : bit_value = 0; + if (wi_id >= 32 && wi_id < 64) + (mx[wg_offset + wi_id].s1 & bit_mask) > 0 + ? bit_value = 1 + : bit_value = 0; + if (wi_id >= 64 && wi_id < 96) + (mx[wg_offset + wi_id].s2 & bit_mask) > 0 + ? bit_value = 1 + : bit_value = 0; + if (wi_id >= 96 && wi_id < 128) + (mx[wg_offset + wi_id].s3 & bit_mask) > 0 + ? bit_value = 1 + : bit_value = 0; + + if (wi_id & 1) + { + bit_value ? expected_result = { 1, 0, 0, 1 } + : expected_result = { 0, 0, 0, 1 }; + } + else + { + bit_value ? expected_result = { 1, 0, 0, 2 } + : expected_result = { 0, 0, 0, 2 }; + } + + device_result = my[wg_offset + wi_id]; + if (!compare(device_result, expected_result)) + { + log_error( + "ERROR: sub_group_%s mismatch for local id %d in " + "sub group %d in group %d obtained {%d, %d, %d, " + "%d}, expected {%d, %d, %d, %d}\n", + operation_names(operation), wi_id, sb_id, wg_id, + device_result.s0, device_result.s1, + device_result.s2, device_result.s3, + expected_result.s0, expected_result.s1, + expected_result.s2, expected_result.s3); + return TEST_FAIL; + } + } + } + x += lws; + y += lws; + m += 4 * lws; + } + return TEST_PASS; + } +}; + +template struct BALLOT_INVERSE +{ + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + log_info(" sub_group_inverse_ballot...%s\n", extra_text); + } + + static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params) + { + int gws = test_params.global_workgroup_size; + int lws = test_params.local_workgroup_size; + int sbs = test_params.subgroup_size; + int non_uniform_size = gws % lws; + // no work here + } + + static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, + const WorkGroupParams &test_params) + { + int wi_id, wg_id, sb_id; + int gws = test_params.global_workgroup_size; + int lws = test_params.local_workgroup_size; + int sbs = test_params.subgroup_size; + int sb_number = (lws + sbs - 1) / sbs; + cl_uint4 expected_result, device_result; + int non_uniform_size = gws % lws; + int wg_number = gws / lws; + int last_subgroup_size = 0; + int current_sbs = 0; + if (non_uniform_size) wg_number++; + + for (wg_id = 0; wg_id < wg_number; ++wg_id) + { // for each work_group + if (non_uniform_size && wg_id == wg_number - 1) + { + set_last_workgroup_params(non_uniform_size, sb_number, sbs, lws, + last_subgroup_size); + } + // Map to array indexed to array indexed by local ID and sub group + for (wi_id = 0; wi_id < lws; ++wi_id) + { // inside the work_group + mx[wi_id] = x[wi_id]; // read host inputs for work_group + my[wi_id] = y[wi_id]; // read device outputs for work_group + } + + for (sb_id = 0; sb_id < sb_number; ++sb_id) + { // for each subgroup + int wg_offset = sb_id * sbs; + if (last_subgroup_size && sb_id == sb_number - 1) + { + current_sbs = last_subgroup_size; + } + else + { + current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs; + } + // take index of array where info which work_item will + // be broadcast its value is stored + int midx = 4 * wg_offset + 2; + // take subgroup local id of this work_item + // Check result + for (wi_id = 0; wi_id < current_sbs; ++wi_id) + { // for each subgroup work item + + wi_id & 1 ? expected_result = { 1, 0, 0, 1 } + : expected_result = { 1, 0, 0, 2 }; + + device_result = my[wg_offset + wi_id]; + if (!compare(device_result, expected_result)) + { + log_error( + "ERROR: sub_group_%s mismatch for local id %d in " + "sub group %d in group %d obtained {%d, %d, %d, " + "%d}, expected {%d, %d, %d, %d}\n", + operation_names(operation), wi_id, sb_id, wg_id, + device_result.s0, device_result.s1, + device_result.s2, device_result.s3, + expected_result.s0, expected_result.s1, + expected_result.s2, expected_result.s3); + return TEST_FAIL; + } + } + } + x += lws; + y += lws; + m += 4 * lws; + } + + return TEST_PASS; + } +}; + + +// Test for bit count/inclusive and exclusive scan/ find lsb msb ballot function +template struct BALLOT_COUNT_SCAN_FIND +{ + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + log_info(" sub_group_%s(%s)...%s\n", operation_names(operation), + TypeManager::name(), extra_text); + } + + static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params) + { + int wi_id, wg_id, sb_id; + int gws = test_params.global_workgroup_size; + int lws = test_params.local_workgroup_size; + int sbs = test_params.subgroup_size; + int sb_number = (lws + sbs - 1) / sbs; + int non_uniform_size = gws % lws; + int wg_number = gws / lws; + int last_subgroup_size = 0; + int current_sbs = 0; + + if (non_uniform_size) + { + wg_number++; + } + int e; + for (wg_id = 0; wg_id < wg_number; ++wg_id) + { // for each work_group + if (non_uniform_size && wg_id == wg_number - 1) + { + set_last_workgroup_params(non_uniform_size, sb_number, sbs, lws, + last_subgroup_size); + } + for (sb_id = 0; sb_id < sb_number; ++sb_id) + { // for each subgroup + int wg_offset = sb_id * sbs; + if (last_subgroup_size && sb_id == sb_number - 1) + { + current_sbs = last_subgroup_size; + } + else + { + current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs; + } + if (operation == BallotOp::ballot_bit_count + || operation == BallotOp::ballot_inclusive_scan + || operation == BallotOp::ballot_exclusive_scan) + { + set_randomdata_for_subgroup(t, wg_offset, current_sbs); + } + else if (operation == BallotOp::ballot_find_lsb + || operation == BallotOp::ballot_find_msb) + { + // Regarding to the spec, find lsb and find msb result is + // undefined behavior if input value is zero, so generate + // only non-zero values. + for (wi_id = 0; wi_id < current_sbs; ++wi_id) + { + char x = (genrand_int32(gMTdata)) & 0xff; + // undefined behaviour in case of 0; + x = x ? x : 1; + memset(&t[wg_offset + wi_id], x, sizeof(Ty)); + } + } + else + { + log_error("Unknown operation..."); + } + } + + // Now map into work group using map from device + for (wi_id = 0; wi_id < lws; ++wi_id) + { + x[wi_id] = t[wi_id]; + } + + x += lws; + m += 4 * lws; + } + } + + static bs128 getImportantBits(cl_uint sub_group_local_id, + cl_uint sub_group_size) + { + bs128 mask; + if (operation == BallotOp::ballot_bit_count + || operation == BallotOp::ballot_find_lsb + || operation == BallotOp::ballot_find_msb) + { + for (cl_uint i = 0; i < sub_group_size; ++i) mask.set(i); + } + else if (operation == BallotOp::ballot_inclusive_scan + || operation == BallotOp::ballot_exclusive_scan) + { + for (cl_uint i = 0; i < sub_group_local_id; ++i) mask.set(i); + if (operation == BallotOp::ballot_inclusive_scan) + mask.set(sub_group_local_id); + } + return mask; + } + + static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, + const WorkGroupParams &test_params) + { + int wi_id, wg_id, sb_id; + int gws = test_params.global_workgroup_size; + int lws = test_params.local_workgroup_size; + int sbs = test_params.subgroup_size; + int sb_number = (lws + sbs - 1) / sbs; + int non_uniform_size = gws % lws; + int wg_number = gws / lws; + wg_number = non_uniform_size ? wg_number + 1 : wg_number; + cl_uint expected_result, device_result; + int last_subgroup_size = 0; + int current_sbs = 0; + + for (wg_id = 0; wg_id < wg_number; ++wg_id) + { // for each work_group + if (non_uniform_size && wg_id == wg_number - 1) + { + set_last_workgroup_params(non_uniform_size, sb_number, sbs, lws, + last_subgroup_size); + } + // Map to array indexed to array indexed by local ID and sub group + for (wi_id = 0; wi_id < lws; ++wi_id) + { // inside the work_group + // read host inputs for work_group + mx[wi_id] = x[wi_id]; + // read device outputs for work_group + my[wi_id] = y[wi_id]; + } + + for (sb_id = 0; sb_id < sb_number; ++sb_id) + { // for each subgroup + int wg_offset = sb_id * sbs; + if (last_subgroup_size && sb_id == sb_number - 1) + { + current_sbs = last_subgroup_size; + } + else + { + current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs; + } + // Check result + expected_result = 0; + for (wi_id = 0; wi_id < current_sbs; ++wi_id) + { // for subgroup element + bs128 bs; + // convert cl_uint4 input into std::bitset<128> + bs |= bs128(mx[wg_offset + wi_id].s0) + | (bs128(mx[wg_offset + wi_id].s1) << 32) + | (bs128(mx[wg_offset + wi_id].s2) << 64) + | (bs128(mx[wg_offset + wi_id].s3) << 96); + bs &= getImportantBits(wi_id, sbs); + device_result = my[wg_offset + wi_id].s0; + if (operation == BallotOp::ballot_inclusive_scan + || operation == BallotOp::ballot_exclusive_scan + || operation == BallotOp::ballot_bit_count) + { + expected_result = bs.count(); + if (!compare(device_result, expected_result)) + { + log_error("ERROR: sub_group_%s " + "mismatch for local id %d in sub group " + "%d in group %d obtained %d, " + "expected %d\n", + operation_names(operation), wi_id, sb_id, + wg_id, device_result, expected_result); + return TEST_FAIL; + } + } + else if (operation == BallotOp::ballot_find_lsb) + { + for (int id = 0; id < sbs; ++id) + { + if (bs.test(id)) + { + expected_result = id; + break; + } + } + if (!compare(device_result, expected_result)) + { + log_error("ERROR: sub_group_ballot_find_lsb " + "mismatch for local id %d in sub group " + "%d in group %d obtained %d, " + "expected %d\n", + wi_id, sb_id, wg_id, device_result, + expected_result); + return TEST_FAIL; + } + } + else if (operation == BallotOp::ballot_find_msb) + { + for (int id = sbs - 1; id >= 0; --id) + { + if (bs.test(id)) + { + expected_result = id; + break; + } + } + if (!compare(device_result, expected_result)) + { + log_error("ERROR: sub_group_ballot_find_msb " + "mismatch for local id %d in sub group " + "%d in group %d obtained %d, " + "expected %d\n", + wi_id, sb_id, wg_id, device_result, + expected_result); + return TEST_FAIL; + } + } + } + } + x += lws; + y += lws; + m += 4 * lws; + } + return TEST_PASS; + } +}; + +// test mask functions +template struct SMASK +{ + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + log_info(" get_sub_group_%s_mask...%s\n", operation_names(operation), + extra_text); + } + + static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params) + { + int wi_id, wg_id, l, sb_id; + int gws = test_params.global_workgroup_size; + int lws = test_params.local_workgroup_size; + int sbs = test_params.subgroup_size; + int sb_number = (lws + sbs - 1) / sbs; + int wg_number = gws / lws; + for (wg_id = 0; wg_id < wg_number; ++wg_id) + { // for each work_group + for (sb_id = 0; sb_id < sb_number; ++sb_id) + { // for each subgroup + int wg_offset = sb_id * sbs; + int current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs; + // Produce expected masks for each work item in the subgroup + for (wi_id = 0; wi_id < current_sbs; ++wi_id) + { + int midx = 4 * wg_offset + 4 * wi_id; + cl_uint max_sub_group_size = m[midx + 2]; + cl_uint4 expected_mask = { 0 }; + expected_mask = generate_bit_mask( + wi_id, operation_names(operation), max_sub_group_size); + set_value(t[wg_offset + wi_id], expected_mask); + } + } + + // Now map into work group using map from device + for (wi_id = 0; wi_id < lws; ++wi_id) + { + x[wi_id] = t[wi_id]; + } + x += lws; + m += 4 * lws; + } + } + + static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, + const WorkGroupParams &test_params) + { + int wi_id, wg_id, sb_id; + int gws = test_params.global_workgroup_size; + int lws = test_params.local_workgroup_size; + int sbs = test_params.subgroup_size; + int sb_number = (lws + sbs - 1) / sbs; + Ty expected_result, device_result; + int wg_number = gws / lws; + + for (wg_id = 0; wg_id < wg_number; ++wg_id) + { // for each work_group + for (wi_id = 0; wi_id < lws; ++wi_id) + { // inside the work_group + mx[wi_id] = x[wi_id]; // read host inputs for work_group + my[wi_id] = y[wi_id]; // read device outputs for work_group + } + + for (sb_id = 0; sb_id < sb_number; ++sb_id) + { + int wg_offset = sb_id * sbs; + int current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs; + + // Check result + for (wi_id = 0; wi_id < current_sbs; ++wi_id) + { // inside the subgroup + expected_result = + mx[wg_offset + wi_id]; // read host input for subgroup + device_result = + my[wg_offset + + wi_id]; // read device outputs for subgroup + if (!compare(device_result, expected_result)) + { + log_error("ERROR: get_sub_group_%s_mask... mismatch " + "for local id %d in sub group %d in group " + "%d, obtained %d, expected %d\n", + operation_names(operation), wi_id, sb_id, + wg_id, device_result, expected_result); + return TEST_FAIL; + } + } + } + x += lws; + y += lws; + m += 4 * lws; + } + return TEST_PASS; + } +}; + +std::string sub_group_non_uniform_broadcast_source = R"( +__kernel void test_sub_group_non_uniform_broadcast(const __global Type *in, __global int4 *xy, __global Type *out) { + int gid = get_global_id(0); + XY(xy,gid); + Type x = in[gid]; + if (xy[gid].x < NR_OF_ACTIVE_WORK_ITEMS) { + out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].z); + } else { + out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].w); + } +} +)"; +std::string sub_group_broadcast_first_source = R"( +__kernel void test_sub_group_broadcast_first(const __global Type *in, __global int4 *xy, __global Type *out) { + int gid = get_global_id(0); + XY(xy,gid); + Type x = in[gid]; + if (xy[gid].x < NR_OF_ACTIVE_WORK_ITEMS) { + out[gid] = sub_group_broadcast_first(x);; + } else { + out[gid] = sub_group_broadcast_first(x);; + } +} +)"; +std::string sub_group_ballot_bit_scan_find_source = R"( +__kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out) { + int gid = get_global_id(0); + XY(xy,gid); + Type x = in[gid]; + uint4 value = (uint4)(0,0,0,0); + value = (uint4)(%s(x),0,0,0); + out[gid] = value; +} +)"; +std::string sub_group_ballot_mask_source = R"( +__kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out) { + int gid = get_global_id(0); + XY(xy,gid); + xy[gid].z = get_max_sub_group_size(); + Type x = in[gid]; + uint4 mask = %s(); + out[gid] = mask; +} +)"; +std::string sub_group_ballot_source = R"( +__kernel void test_sub_group_ballot(const __global Type *in, __global int4 *xy, __global Type *out, uint4 work_item_mask_vector) { + uint gid = get_global_id(0); + XY(xy,gid); + uint subgroup_local_id = get_sub_group_local_id(); + uint elect_work_item = 1 << (subgroup_local_id % 32); + uint work_item_mask; + if (subgroup_local_id < 32) { + work_item_mask = work_item_mask_vector.x; + } else if(subgroup_local_id < 64) { + work_item_mask = work_item_mask_vector.y; + } else if(subgroup_local_id < 96) { + work_item_mask = work_item_mask_vector.z; + } else if(subgroup_local_id < 128) { + work_item_mask = work_item_mask_vector.w; + } + uint4 value = (uint4)(0, 0, 0, 0); + if (elect_work_item & work_item_mask) { + value = sub_group_ballot(in[gid].s0); + } + out[gid] = value; +} +)"; +std::string sub_group_inverse_ballot_source = R"( +__kernel void test_sub_group_inverse_ballot(const __global Type *in, __global int4 *xy, __global Type *out) { + int gid = get_global_id(0); + XY(xy,gid); + Type x = in[gid]; + uint4 value = (uint4)(10,0,0,0); + if (get_sub_group_local_id() & 1) { + uint4 partial_ballot_mask = (uint4)(0xAAAAAAAA,0xAAAAAAAA,0xAAAAAAAA,0xAAAAAAAA); + if (sub_group_inverse_ballot(partial_ballot_mask)) { + value = (uint4)(1,0,0,1); + } else { + value = (uint4)(0,0,0,1); + } + } else { + uint4 partial_ballot_mask = (uint4)(0x55555555,0x55555555,0x55555555,0x55555555); + if (sub_group_inverse_ballot(partial_ballot_mask)) { + value = (uint4)(1,0,0,2); + } else { + value = (uint4)(0,0,0,2); + } + } + out[gid] = value; +} +)"; +std::string sub_group_ballot_bit_extract_source = R"( + __kernel void test_sub_group_ballot_bit_extract(const __global Type *in, __global int4 *xy, __global Type *out) { + int gid = get_global_id(0); + XY(xy,gid); + Type x = in[gid]; + uint index = xy[gid].z; + uint4 value = (uint4)(10,0,0,0); + if (get_sub_group_local_id() & 1) { + if (sub_group_ballot_bit_extract(x, xy[gid].z)) { + value = (uint4)(1,0,0,1); + } else { + value = (uint4)(0,0,0,1); + } + } else { + if (sub_group_ballot_bit_extract(x, xy[gid].w)) { + value = (uint4)(1,0,0,2); + } else { + value = (uint4)(0,0,0,2); + } + } + out[gid] = value; +} +)"; + +template int run_non_uniform_broadcast_for_type(RunTestForType rft) +{ + int error = + rft.run_impl>( + "sub_group_non_uniform_broadcast"); + return error; +} + + +} + +int test_subgroup_functions_ballot(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + if (!is_extension_available(device, "cl_khr_subgroup_ballot")) + { + log_info("cl_khr_subgroup_ballot is not supported on this device, " + "skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + constexpr size_t global_work_size = 170; + constexpr size_t local_work_size = 64; + WorkGroupParams test_params(global_work_size, local_work_size); + test_params.save_kernel_source(sub_group_ballot_mask_source); + test_params.save_kernel_source(sub_group_non_uniform_broadcast_source, + "sub_group_non_uniform_broadcast"); + test_params.save_kernel_source(sub_group_broadcast_first_source, + "sub_group_broadcast_first"); + RunTestForType rft(device, context, queue, num_elements, test_params); + + // non uniform broadcast functions + int error = run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); + + // broadcast first functions + error |= + rft.run_impl>( + "sub_group_broadcast_first"); + error |= rft.run_impl>( + "sub_group_broadcast_first"); + error |= rft.run_impl>( + "sub_group_broadcast_first"); + error |= rft.run_impl>( + "sub_group_broadcast_first"); + error |= rft.run_impl>( + "sub_group_broadcast_first"); + error |= rft.run_impl>( + "sub_group_broadcast_first"); + error |= rft.run_impl>( + "sub_group_broadcast_first"); + error |= rft.run_impl>( + "sub_group_broadcast_first"); + error |= rft.run_impl>( + "sub_group_broadcast_first"); + error |= rft.run_impl>( + "sub_group_broadcast_first"); + error |= rft.run_impl< + subgroups::cl_half, + BC>( + "sub_group_broadcast_first"); + + // mask functions + error |= rft.run_impl>( + "get_sub_group_eq_mask"); + error |= rft.run_impl>( + "get_sub_group_ge_mask"); + error |= rft.run_impl>( + "get_sub_group_gt_mask"); + error |= rft.run_impl>( + "get_sub_group_le_mask"); + error |= rft.run_impl>( + "get_sub_group_lt_mask"); + + // sub_group_ballot function + WorkGroupParams test_params_ballot(global_work_size, local_work_size, 3); + test_params_ballot.save_kernel_source(sub_group_ballot_source); + RunTestForType rft_ballot(device, context, queue, num_elements, + test_params_ballot); + error |= + rft_ballot.run_impl>("sub_group_ballot"); + + // ballot arithmetic functions + WorkGroupParams test_params_arith(global_work_size, local_work_size); + test_params_arith.save_kernel_source(sub_group_ballot_bit_scan_find_source); + test_params_arith.save_kernel_source(sub_group_inverse_ballot_source, + "sub_group_inverse_ballot"); + test_params_arith.save_kernel_source(sub_group_ballot_bit_extract_source, + "sub_group_ballot_bit_extract"); + RunTestForType rft_arith(device, context, queue, num_elements, + test_params_arith); + error |= + rft_arith.run_impl>( + "sub_group_inverse_ballot"); + error |= rft_arith.run_impl< + cl_uint4, BALLOT_BIT_EXTRACT>( + "sub_group_ballot_bit_extract"); + error |= rft_arith.run_impl< + cl_uint4, BALLOT_COUNT_SCAN_FIND>( + "sub_group_ballot_bit_count"); + error |= rft_arith.run_impl< + cl_uint4, + BALLOT_COUNT_SCAN_FIND>( + "sub_group_ballot_inclusive_scan"); + error |= rft_arith.run_impl< + cl_uint4, + BALLOT_COUNT_SCAN_FIND>( + "sub_group_ballot_exclusive_scan"); + error |= rft_arith.run_impl< + cl_uint4, BALLOT_COUNT_SCAN_FIND>( + "sub_group_ballot_find_lsb"); + error |= rft_arith.run_impl< + cl_uint4, BALLOT_COUNT_SCAN_FIND>( + "sub_group_ballot_find_msb"); + + return error; +} diff --git a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp new file mode 100644 index 0000000000..527be5ad5c --- /dev/null +++ b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp @@ -0,0 +1,214 @@ +// +// Copyright (c) 2021 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "procs.h" +#include "subhelpers.h" +#include "subgroup_common_templates.h" +#include "harness/typeWrappers.h" + +#define CLUSTER_SIZE 4 +#define CLUSTER_SIZE_STR "4" + +namespace { +std::string sub_group_clustered_reduce_source = R"( +__kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out) { + int gid = get_global_id(0); + XY(xy,gid); + xy[gid].w = 0; + if (sizeof(in[gid]) == sizeof(%s(in[gid], )" CLUSTER_SIZE_STR R"())) { + xy[gid].w = sizeof(in[gid]); + } + out[gid] = %s(in[gid], )" CLUSTER_SIZE_STR R"(); +} +)"; + +// DESCRIPTION: +// Test for reduce cluster functions +template struct RED_CLU +{ + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + log_info(" sub_group_clustered_reduce_%s(%s, %d bytes) ...%s\n", + operation_names(operation), TypeManager::name(), + sizeof(Ty), extra_text); + } + + static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params) + { + int nw = test_params.local_workgroup_size; + int ns = test_params.subgroup_size; + int ng = test_params.global_workgroup_size; + ng = ng / nw; + generate_inputs(x, t, m, ns, nw, ng); + } + + static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, + const WorkGroupParams &test_params) + { + int nw = test_params.local_workgroup_size; + int ns = test_params.subgroup_size; + int ng = test_params.global_workgroup_size; + int nj = (nw + ns - 1) / ns; + ng = ng / nw; + + for (int k = 0; k < ng; ++k) + { + std::vector data_type_sizes; + // Map to array indexed to array indexed by local ID and sub group + for (int j = 0; j < nw; ++j) + { + mx[j] = x[j]; + my[j] = y[j]; + data_type_sizes.push_back(m[4 * j + 3]); + } + + for (cl_int dts : data_type_sizes) + { + if (dts != sizeof(Ty)) + { + log_error("ERROR: sub_group_clustered_reduce_%s(%s) " + "wrong data type size detected, expected: %d, " + "used by device %d, in group %d\n", + operation_names(operation), + TypeManager::name(), sizeof(Ty), dts, k); + return TEST_FAIL; + } + } + + for (int j = 0; j < nj; ++j) + { + int ii = j * ns; + int n = ii + ns > nw ? nw - ii : ns; + int midx = 4 * ii + 2; + std::vector clusters_results; + int clusters_counter = ns / CLUSTER_SIZE; + clusters_results.resize(clusters_counter); + + // Compute target + Ty tr = mx[ii]; + for (int i = 0; i < n; ++i) + { + if (i % CLUSTER_SIZE == 0) + tr = mx[ii + i]; + else + tr = calculate(tr, mx[ii + i], operation); + clusters_results[i / CLUSTER_SIZE] = tr; + } + + // Check result + for (int i = 0; i < n; ++i) + { + Ty rr = my[ii + i]; + tr = clusters_results[i / CLUSTER_SIZE]; + if (!compare(rr, tr)) + { + log_error( + "ERROR: sub_group_clustered_reduce_%s(%s) mismatch " + "for local id %d in sub group %d in group %d\n", + operation_names(operation), TypeManager::name(), + i, j, k); + return TEST_FAIL; + } + } + } + + x += nw; + y += nw; + m += 4 * nw; + } + return TEST_PASS; + } +}; + +template +int run_cluster_red_add_max_min_mul_for_type(RunTestForType rft) +{ + int error = rft.run_impl>( + "sub_group_clustered_reduce_add"); + error |= rft.run_impl>( + "sub_group_clustered_reduce_max"); + error |= rft.run_impl>( + "sub_group_clustered_reduce_min"); + error |= rft.run_impl>( + "sub_group_clustered_reduce_mul"); + return error; +} +template int run_cluster_and_or_xor_for_type(RunTestForType rft) +{ + int error = rft.run_impl>( + "sub_group_clustered_reduce_and"); + error |= rft.run_impl>( + "sub_group_clustered_reduce_or"); + error |= rft.run_impl>( + "sub_group_clustered_reduce_xor"); + return error; +} +template +int run_cluster_logical_and_or_xor_for_type(RunTestForType rft) +{ + int error = rft.run_impl>( + "sub_group_clustered_reduce_logical_and"); + error |= rft.run_impl>( + "sub_group_clustered_reduce_logical_or"); + error |= rft.run_impl>( + "sub_group_clustered_reduce_logical_xor"); + + return error; +} +} + +int test_subgroup_functions_clustered_reduce(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + if (!is_extension_available(device, "cl_khr_subgroup_clustered_reduce")) + { + log_info("cl_khr_subgroup_clustered_reduce is not supported on this " + "device, skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + constexpr size_t global_work_size = 2000; + constexpr size_t local_work_size = 200; + WorkGroupParams test_params(global_work_size, local_work_size); + test_params.save_kernel_source(sub_group_clustered_reduce_source); + RunTestForType rft(device, context, queue, num_elements, test_params); + + int error = run_cluster_red_add_max_min_mul_for_type(rft); + error |= run_cluster_red_add_max_min_mul_for_type(rft); + error |= run_cluster_red_add_max_min_mul_for_type(rft); + error |= run_cluster_red_add_max_min_mul_for_type(rft); + error |= run_cluster_red_add_max_min_mul_for_type(rft); + error |= run_cluster_red_add_max_min_mul_for_type(rft); + error |= run_cluster_red_add_max_min_mul_for_type(rft); + error |= run_cluster_red_add_max_min_mul_for_type(rft); + error |= run_cluster_red_add_max_min_mul_for_type(rft); + error |= run_cluster_red_add_max_min_mul_for_type(rft); + error |= run_cluster_red_add_max_min_mul_for_type(rft); + + error |= run_cluster_and_or_xor_for_type(rft); + error |= run_cluster_and_or_xor_for_type(rft); + error |= run_cluster_and_or_xor_for_type(rft); + error |= run_cluster_and_or_xor_for_type(rft); + error |= run_cluster_and_or_xor_for_type(rft); + error |= run_cluster_and_or_xor_for_type(rft); + error |= run_cluster_and_or_xor_for_type(rft); + error |= run_cluster_and_or_xor_for_type(rft); + + error |= run_cluster_logical_and_or_xor_for_type(rft); + return error; +} diff --git a/test_conformance/subgroups/test_subgroup_extended_types.cpp b/test_conformance/subgroups/test_subgroup_extended_types.cpp new file mode 100644 index 0000000000..c9e6bb616f --- /dev/null +++ b/test_conformance/subgroups/test_subgroup_extended_types.cpp @@ -0,0 +1,148 @@ +// +// Copyright (c) 2021 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "procs.h" +#include "subhelpers.h" +#include "subgroup_common_kernels.h" +#include "subgroup_common_templates.h" +#include "harness/typeWrappers.h" + +namespace { + +template int run_broadcast_for_extended_type(RunTestForType rft) +{ + int error = rft.run_impl>( + "sub_group_broadcast"); + return error; +} + +template int run_scan_reduction_for_type(RunTestForType rft) +{ + int error = + rft.run_impl>("sub_group_reduce_add"); + error |= + rft.run_impl>("sub_group_reduce_max"); + error |= + rft.run_impl>("sub_group_reduce_min"); + error |= rft.run_impl>( + "sub_group_scan_inclusive_add"); + error |= rft.run_impl>( + "sub_group_scan_inclusive_max"); + error |= rft.run_impl>( + "sub_group_scan_inclusive_min"); + error |= rft.run_impl>( + "sub_group_scan_exclusive_add"); + error |= rft.run_impl>( + "sub_group_scan_exclusive_max"); + error |= rft.run_impl>( + "sub_group_scan_exclusive_min"); + return error; +} + + +} + +int test_subgroup_functions_extended_types(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + if (!is_extension_available(device, "cl_khr_subgroup_extended_types")) + { + log_info("cl_khr_subgroup_extended_types is not supported on this " + "device, skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + constexpr size_t global_work_size = 2000; + constexpr size_t local_work_size = 200; + WorkGroupParams test_params(global_work_size, local_work_size); + test_params.save_kernel_source(sub_group_reduction_scan_source); + test_params.save_kernel_source(sub_group_generic_source, + "sub_group_broadcast"); + + RunTestForType rft(device, context, queue, num_elements, test_params); + int error = run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + error |= run_broadcast_for_extended_type(rft); + + error |= run_scan_reduction_for_type(rft); + error |= run_scan_reduction_for_type(rft); + error |= run_scan_reduction_for_type(rft); + error |= run_scan_reduction_for_type(rft); + return error; +} diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp new file mode 100644 index 0000000000..02fc507b55 --- /dev/null +++ b/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp @@ -0,0 +1,166 @@ +// +// Copyright (c) 2021 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "procs.h" +#include "subhelpers.h" +#include "harness/typeWrappers.h" +#include "subgroup_common_templates.h" + +namespace { + +std::string sub_group_non_uniform_arithmetic_source = R"( + __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out, uint4 work_item_mask_vector) { + int gid = get_global_id(0); + XY(xy,gid); + uint subgroup_local_id = get_sub_group_local_id(); + uint elect_work_item = 1 << (subgroup_local_id % 32); + uint work_item_mask; + if(subgroup_local_id < 32) { + work_item_mask = work_item_mask_vector.x; + } else if(subgroup_local_id < 64) { + work_item_mask = work_item_mask_vector.y; + } else if(subgroup_local_id < 96) { + work_item_mask = work_item_mask_vector.z; + } else if(subgroup_local_id < 128) { + work_item_mask = work_item_mask_vector.w; + } + if (elect_work_item & work_item_mask){ + out[gid] = %s(in[gid]); + } + } +)"; + +template +int run_functions_add_mul_max_min_for_type(RunTestForType rft) +{ + int error = rft.run_impl>( + "sub_group_non_uniform_scan_inclusive_add"); + error |= rft.run_impl>( + "sub_group_non_uniform_scan_inclusive_mul"); + error |= rft.run_impl>( + "sub_group_non_uniform_scan_inclusive_max"); + error |= rft.run_impl>( + "sub_group_non_uniform_scan_inclusive_min"); + error |= rft.run_impl>( + "sub_group_non_uniform_scan_exclusive_add"); + error |= rft.run_impl>( + "sub_group_non_uniform_scan_exclusive_mul"); + error |= rft.run_impl>( + "sub_group_non_uniform_scan_exclusive_max"); + error |= rft.run_impl>( + "sub_group_non_uniform_scan_exclusive_min"); + error |= rft.run_impl>( + "sub_group_non_uniform_reduce_add"); + error |= rft.run_impl>( + "sub_group_non_uniform_reduce_mul"); + error |= rft.run_impl>( + "sub_group_non_uniform_reduce_max"); + error |= rft.run_impl>( + "sub_group_non_uniform_reduce_min"); + return error; +} + +template int run_functions_and_or_xor_for_type(RunTestForType rft) +{ + int error = rft.run_impl>( + "sub_group_non_uniform_scan_inclusive_and"); + error |= rft.run_impl>( + "sub_group_non_uniform_scan_inclusive_or"); + error |= rft.run_impl>( + "sub_group_non_uniform_scan_inclusive_xor"); + error |= rft.run_impl>( + "sub_group_non_uniform_scan_exclusive_and"); + error |= rft.run_impl>( + "sub_group_non_uniform_scan_exclusive_or"); + error |= rft.run_impl>( + "sub_group_non_uniform_scan_exclusive_xor"); + error |= rft.run_impl>( + "sub_group_non_uniform_reduce_and"); + error |= rft.run_impl>( + "sub_group_non_uniform_reduce_or"); + error |= rft.run_impl>( + "sub_group_non_uniform_reduce_xor"); + return error; +} + +template +int run_functions_logical_and_or_xor_for_type(RunTestForType rft) +{ + int error = rft.run_impl>( + "sub_group_non_uniform_scan_inclusive_logical_and"); + error |= rft.run_impl>( + "sub_group_non_uniform_scan_inclusive_logical_or"); + error |= rft.run_impl>( + "sub_group_non_uniform_scan_inclusive_logical_xor"); + error |= rft.run_impl>( + "sub_group_non_uniform_scan_exclusive_logical_and"); + error |= rft.run_impl>( + "sub_group_non_uniform_scan_exclusive_logical_or"); + error |= rft.run_impl>( + "sub_group_non_uniform_scan_exclusive_logical_xor"); + error |= rft.run_impl>( + "sub_group_non_uniform_reduce_logical_and"); + error |= rft.run_impl>( + "sub_group_non_uniform_reduce_logical_or"); + error |= rft.run_impl>( + "sub_group_non_uniform_reduce_logical_xor"); + return error; +} + +} + +int test_subgroup_functions_non_uniform_arithmetic(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + if (!is_extension_available(device, + "cl_khr_subgroup_non_uniform_arithmetic")) + { + log_info("cl_khr_subgroup_non_uniform_arithmetic is not supported on " + "this device, skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + constexpr size_t global_work_size = 2000; + constexpr size_t local_work_size = 200; + WorkGroupParams test_params(global_work_size, local_work_size, 3); + test_params.save_kernel_source(sub_group_non_uniform_arithmetic_source); + RunTestForType rft(device, context, queue, num_elements, test_params); + + int error = run_functions_add_mul_max_min_for_type(rft); + error |= run_functions_add_mul_max_min_for_type(rft); + error |= run_functions_add_mul_max_min_for_type(rft); + error |= run_functions_add_mul_max_min_for_type(rft); + error |= run_functions_add_mul_max_min_for_type(rft); + error |= run_functions_add_mul_max_min_for_type(rft); + error |= run_functions_add_mul_max_min_for_type(rft); + error |= run_functions_add_mul_max_min_for_type(rft); + error |= run_functions_add_mul_max_min_for_type(rft); + error |= run_functions_add_mul_max_min_for_type(rft); + error |= run_functions_add_mul_max_min_for_type(rft); + + error |= run_functions_and_or_xor_for_type(rft); + error |= run_functions_and_or_xor_for_type(rft); + error |= run_functions_and_or_xor_for_type(rft); + error |= run_functions_and_or_xor_for_type(rft); + error |= run_functions_and_or_xor_for_type(rft); + error |= run_functions_and_or_xor_for_type(rft); + error |= run_functions_and_or_xor_for_type(rft); + error |= run_functions_and_or_xor_for_type(rft); + + error |= run_functions_logical_and_or_xor_for_type(rft); + return error; +} diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp new file mode 100644 index 0000000000..3be1ba307a --- /dev/null +++ b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp @@ -0,0 +1,291 @@ +// +// Copyright (c) 2021 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "procs.h" +#include "subhelpers.h" +#include "harness/typeWrappers.h" +#include + +namespace { + +template struct VOTE +{ + static void log_test(const WorkGroupParams &test_params, + const char *extra_text) + { + log_info(" sub_group_%s%s(%s)...%s\n", + (operation == NonUniformVoteOp::elect) ? "" : "non_uniform_", + operation_names(operation), TypeManager::name(), + extra_text); + } + + static void gen(T *x, T *t, cl_int *m, const WorkGroupParams &test_params) + { + int i, ii, j, k, n; + int nw = test_params.local_workgroup_size; + int ns = test_params.subgroup_size; + int ng = test_params.global_workgroup_size; + int nj = (nw + ns - 1) / ns; + int non_uniform_size = ng % nw; + ng = ng / nw; + int last_subgroup_size = 0; + ii = 0; + + if (operation == NonUniformVoteOp::elect) return; + + for (k = 0; k < ng; ++k) + { // for each work_group + if (non_uniform_size && k == ng - 1) + { + set_last_workgroup_params(non_uniform_size, nj, ns, nw, + last_subgroup_size); + } + for (j = 0; j < nj; ++j) + { // for each subgroup + ii = j * ns; + if (last_subgroup_size && j == nj - 1) + { + n = last_subgroup_size; + } + else + { + n = ii + ns > nw ? nw - ii : ns; + } + int e = genrand_int32(gMTdata) % 3; + + for (i = 0; i < n; i++) + { + if (e == 2) + { // set once 0 and once 1 alternately + int value = i % 2; + set_value(t[ii + i], value); + } + else + { // set 0/1 for all work items in subgroup + set_value(t[ii + i], e); + } + } + } + // Now map into work group using map from device + for (j = 0; j < nw; ++j) + { + x[j] = t[j]; + } + x += nw; + m += 4 * nw; + } + } + + static test_status chk(T *x, T *y, T *mx, T *my, cl_int *m, + const WorkGroupParams &test_params) + { + int ii, i, j, k, n; + int nw = test_params.local_workgroup_size; + int ns = test_params.subgroup_size; + int ng = test_params.global_workgroup_size; + int nj = (nw + ns - 1) / ns; + cl_int tr, rr; + int non_uniform_size = ng % nw; + ng = ng / nw; + if (non_uniform_size) ng++; + int last_subgroup_size = 0; + + for (k = 0; k < ng; ++k) + { // for each work_group + if (non_uniform_size && k == ng - 1) + { + set_last_workgroup_params(non_uniform_size, nj, ns, nw, + last_subgroup_size); + } + for (j = 0; j < nw; ++j) + { // inside the work_group + mx[j] = x[j]; // read host inputs for work_group + my[j] = y[j]; // read device outputs for work_group + } + + for (j = 0; j < nj; ++j) + { // for each subgroup + ii = j * ns; + if (last_subgroup_size && j == nj - 1) + { + n = last_subgroup_size; + } + else + { + n = ii + ns > nw ? nw - ii : ns; + } + + rr = 0; + if (operation == NonUniformVoteOp::all + || operation == NonUniformVoteOp::all_equal) + tr = 1; + if (operation == NonUniformVoteOp::any) tr = 0; + + std::set active_work_items; + for (i = 0; i < n; ++i) + { + if (test_params.work_items_mask.test(i)) + { + active_work_items.insert(i); + switch (operation) + { + case NonUniformVoteOp::elect: break; + + case NonUniformVoteOp::all: + tr &= + !compare_ordered(mx[ii + i], 0) ? 1 : 0; + break; + case NonUniformVoteOp::any: + tr |= + !compare_ordered(mx[ii + i], 0) ? 1 : 0; + break; + case NonUniformVoteOp::all_equal: + tr &= compare_ordered( + mx[ii + i], + mx[ii + *active_work_items.begin()]) + ? 1 + : 0; + break; + default: + log_error("Unknown operation\n"); + return TEST_FAIL; + } + } + } + if (active_work_items.empty()) + { + continue; + } + auto lowest_active = active_work_items.begin(); + for (const int &active_work_item : active_work_items) + { + i = active_work_item; + if (operation == NonUniformVoteOp::elect) + { + i == *lowest_active ? tr = 1 : tr = 0; + } + + // normalize device values on host, non zero set 1. + rr = compare_ordered(my[ii + i], 0) ? 0 : 1; + + if (rr != tr) + { + log_error("ERROR: sub_group_%s() \n", + operation_names(operation)); + log_error("mismatch for work item %d sub group %d in " + "work group %d. Expected: %d Obtained: %d\n", + i, j, k, tr, rr); + return TEST_FAIL; + } + } + } + + x += nw; + y += nw; + m += 4 * nw; + } + + return TEST_PASS; + } +}; + +std::string sub_group_elect_source = R"( + __kernel void test_sub_group_elect(const __global Type *in, __global int4 *xy, __global Type *out, uint4 work_item_mask_vector) { + int gid = get_global_id(0); + XY(xy,gid); + uint subgroup_local_id = get_sub_group_local_id(); + uint elect_work_item = 1 << (subgroup_local_id % 32); + uint work_item_mask; + if(subgroup_local_id < 32) { + work_item_mask = work_item_mask_vector.x; + } else if(subgroup_local_id < 64) { + work_item_mask = work_item_mask_vector.y; + } else if(subgroup_local_id < 96) { + work_item_mask = work_item_mask_vector.z; + } else if(subgroup_local_id < 128) { + work_item_mask = work_item_mask_vector.w; + } + if (elect_work_item & work_item_mask){ + out[gid] = sub_group_elect(); + } + } +)"; + +std::string sub_group_non_uniform_any_all_all_equal_source = R"( + __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out, uint4 work_item_mask_vector) { + int gid = get_global_id(0); + XY(xy,gid); + uint subgroup_local_id = get_sub_group_local_id(); + uint elect_work_item = 1 << (subgroup_local_id % 32); + uint work_item_mask; + if(subgroup_local_id < 32) { + work_item_mask = work_item_mask_vector.x; + } else if(subgroup_local_id < 64) { + work_item_mask = work_item_mask_vector.y; + } else if(subgroup_local_id < 96) { + work_item_mask = work_item_mask_vector.z; + } else if(subgroup_local_id < 128) { + work_item_mask = work_item_mask_vector.w; + } + if (elect_work_item & work_item_mask){ + out[gid] = %s(in[gid]); + } + } +)"; + +template int run_vote_all_equal_for_type(RunTestForType rft) +{ + int error = rft.run_impl>( + "sub_group_non_uniform_all_equal"); + return error; +} +} + +int test_subgroup_functions_non_uniform_vote(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + if (!is_extension_available(device, "cl_khr_subgroup_non_uniform_vote")) + { + log_info("cl_khr_subgroup_non_uniform_vote is not supported on this " + "device, skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + constexpr size_t global_work_size = 170; + constexpr size_t local_work_size = 64; + WorkGroupParams test_params(global_work_size, local_work_size, 3); + test_params.save_kernel_source( + sub_group_non_uniform_any_all_all_equal_source); + test_params.save_kernel_source(sub_group_elect_source, "sub_group_elect"); + RunTestForType rft(device, context, queue, num_elements, test_params); + + int error = run_vote_all_equal_for_type(rft); + error |= run_vote_all_equal_for_type(rft); + error |= run_vote_all_equal_for_type(rft); + error |= run_vote_all_equal_for_type(rft); + error |= run_vote_all_equal_for_type(rft); + error |= run_vote_all_equal_for_type(rft); + error |= run_vote_all_equal_for_type(rft); + + error |= rft.run_impl>( + "sub_group_non_uniform_all"); + error |= rft.run_impl>( + "sub_group_elect"); + error |= rft.run_impl>( + "sub_group_non_uniform_any"); + return error; +} diff --git a/test_conformance/subgroups/test_subgroup_shuffle.cpp b/test_conformance/subgroups/test_subgroup_shuffle.cpp new file mode 100644 index 0000000000..56231cbfa9 --- /dev/null +++ b/test_conformance/subgroups/test_subgroup_shuffle.cpp @@ -0,0 +1,65 @@ +// +// Copyright (c) 2021 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "procs.h" +#include "subhelpers.h" +#include "subgroup_common_kernels.h" +#include "subgroup_common_templates.h" +#include "harness/typeWrappers.h" +#include + +namespace { + +template int run_shuffle_for_type(RunTestForType rft) +{ + int error = + rft.run_impl>("sub_group_shuffle"); + error |= rft.run_impl>( + "sub_group_shuffle_xor"); + return error; +} + +} + +int test_subgroup_functions_shuffle(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + if (!is_extension_available(device, "cl_khr_subgroup_shuffle")) + { + log_info("cl_khr_subgroup_shuffle is not supported on this device, " + "skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + constexpr size_t global_work_size = 2000; + constexpr size_t local_work_size = 200; + WorkGroupParams test_params(global_work_size, local_work_size); + test_params.save_kernel_source(sub_group_generic_source); + RunTestForType rft(device, context, queue, num_elements, test_params); + + int error = run_shuffle_for_type(rft); + error |= run_shuffle_for_type(rft); + error |= run_shuffle_for_type(rft); + error |= run_shuffle_for_type(rft); + error |= run_shuffle_for_type(rft); + error |= run_shuffle_for_type(rft); + error |= run_shuffle_for_type(rft); + error |= run_shuffle_for_type(rft); + error |= run_shuffle_for_type(rft); + error |= run_shuffle_for_type(rft); + error |= run_shuffle_for_type(rft); + + return error; +} diff --git a/test_conformance/subgroups/test_subgroup_shuffle_relative.cpp b/test_conformance/subgroups/test_subgroup_shuffle_relative.cpp new file mode 100644 index 0000000000..caa1dccca9 --- /dev/null +++ b/test_conformance/subgroups/test_subgroup_shuffle_relative.cpp @@ -0,0 +1,67 @@ +// +// Copyright (c) 2021 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "procs.h" +#include "subhelpers.h" +#include "subgroup_common_kernels.h" +#include "subgroup_common_templates.h" +#include "harness/conversions.h" +#include "harness/typeWrappers.h" + +namespace { + +template int run_shuffle_relative_for_type(RunTestForType rft) +{ + int error = + rft.run_impl>("sub_group_shuffle_up"); + error |= rft.run_impl>( + "sub_group_shuffle_down"); + return error; +} + +} + +int test_subgroup_functions_shuffle_relative(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + if (!is_extension_available(device, "cl_khr_subgroup_shuffle_relative")) + { + log_info("cl_khr_subgroup_shuffle_relative is not supported on this " + "device, skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + constexpr size_t global_work_size = 2000; + constexpr size_t local_work_size = 200; + WorkGroupParams test_params(global_work_size, local_work_size); + test_params.save_kernel_source(sub_group_generic_source); + RunTestForType rft(device, context, queue, num_elements, test_params); + + int error = run_shuffle_relative_for_type(rft); + error |= run_shuffle_relative_for_type(rft); + error |= run_shuffle_relative_for_type(rft); + error |= run_shuffle_relative_for_type(rft); + error |= run_shuffle_relative_for_type(rft); + error |= run_shuffle_relative_for_type(rft); + error |= run_shuffle_relative_for_type(rft); + error |= run_shuffle_relative_for_type(rft); + error |= run_shuffle_relative_for_type(rft); + error |= run_shuffle_relative_for_type(rft); + error |= run_shuffle_relative_for_type(rft); + + return error; +} diff --git a/test_conformance/subgroups/test_workgroup.cpp b/test_conformance/subgroups/test_workgroup.cpp deleted file mode 100644 index 779d30f680..0000000000 --- a/test_conformance/subgroups/test_workgroup.cpp +++ /dev/null @@ -1,727 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "procs.h" -#include "subhelpers.h" -#include "harness/conversions.h" -#include "harness/typeWrappers.h" - -static const char *any_source = "__kernel void test_any(const __global Type " - "*in, __global int2 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " out[gid] = sub_group_any(in[gid]);\n" - "}\n"; - -static const char *all_source = "__kernel void test_all(const __global Type " - "*in, __global int2 *xy, __global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " out[gid] = sub_group_all(in[gid]);\n" - "}\n"; - -static const char *bcast_source = - "__kernel void test_bcast(const __global Type *in, __global int2 *xy, " - "__global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " Type x = in[gid];\n" - " size_t loid = (size_t)((int)x % 100);\n" - " out[gid] = sub_group_broadcast(x, loid);\n" - "}\n"; - -static const char *redadd_source = - "__kernel void test_redadd(const __global Type *in, __global int2 *xy, " - "__global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " out[gid] = sub_group_reduce_add(in[gid]);\n" - "}\n"; - -static const char *redmax_source = - "__kernel void test_redmax(const __global Type *in, __global int2 *xy, " - "__global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " out[gid] = sub_group_reduce_max(in[gid]);\n" - "}\n"; - -static const char *redmin_source = - "__kernel void test_redmin(const __global Type *in, __global int2 *xy, " - "__global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " out[gid] = sub_group_reduce_min(in[gid]);\n" - "}\n"; - -static const char *scinadd_source = - "__kernel void test_scinadd(const __global Type *in, __global int2 *xy, " - "__global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " out[gid] = sub_group_scan_inclusive_add(in[gid]);\n" - "}\n"; - -static const char *scinmax_source = - "__kernel void test_scinmax(const __global Type *in, __global int2 *xy, " - "__global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " out[gid] = sub_group_scan_inclusive_max(in[gid]);\n" - "}\n"; - -static const char *scinmin_source = - "__kernel void test_scinmin(const __global Type *in, __global int2 *xy, " - "__global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " out[gid] = sub_group_scan_inclusive_min(in[gid]);\n" - "}\n"; - -static const char *scexadd_source = - "__kernel void test_scexadd(const __global Type *in, __global int2 *xy, " - "__global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " out[gid] = sub_group_scan_exclusive_add(in[gid]);\n" - "}\n"; - -static const char *scexmax_source = - "__kernel void test_scexmax(const __global Type *in, __global int2 *xy, " - "__global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " out[gid] = sub_group_scan_exclusive_max(in[gid]);\n" - "}\n"; - -static const char *scexmin_source = - "__kernel void test_scexmin(const __global Type *in, __global int2 *xy, " - "__global Type *out)\n" - "{\n" - " int gid = get_global_id(0);\n" - " XY(xy,gid);\n" - " out[gid] = sub_group_scan_exclusive_min(in[gid]);\n" - "}\n"; - - -// Any/All test functions -template struct AA -{ - static void gen(cl_int *x, cl_int *t, cl_int *m, int ns, int nw, int ng) - { - int i, ii, j, k, n; - int nj = (nw + ns - 1) / ns; - int e; - - ii = 0; - for (k = 0; k < ng; ++k) - { - for (j = 0; j < nj; ++j) - { - ii = j * ns; - n = ii + ns > nw ? nw - ii : ns; - e = (int)(genrand_int32(gMTdata) % 3); - - // Initialize data matrix indexed by local id and sub group id - switch (e) - { - case 0: memset(&t[ii], 0, n * sizeof(cl_int)); break; - case 1: - memset(&t[ii], 0, n * sizeof(cl_int)); - i = (int)(genrand_int32(gMTdata) % (cl_uint)n); - t[ii + i] = 41; - break; - case 2: memset(&t[ii], 0xff, n * sizeof(cl_int)); break; - } - } - - // Now map into work group using map from device - for (j = 0; j < nw; ++j) - { - i = m[2 * j + 1] * ns + m[2 * j]; - x[j] = t[i]; - } - - x += nw; - m += 2 * nw; - } - } - - static int chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my, cl_int *m, - int ns, int nw, int ng) - { - int ii, i, j, k, n; - int nj = (nw + ns - 1) / ns; - cl_int taa, raa; - - log_info(" sub_group_%s...\n", Which == 0 ? "any" : "all"); - - for (k = 0; k < ng; ++k) - { - // Map to array indexed to array indexed by local ID and sub group - for (j = 0; j < nw; ++j) - { - i = m[2 * j + 1] * ns + m[2 * j]; - mx[i] = x[j]; - my[i] = y[j]; - } - - for (j = 0; j < nj; ++j) - { - ii = j * ns; - n = ii + ns > nw ? nw - ii : ns; - - // Compute target - if (Which == 0) - { - taa = 0; - for (i = 0; i < n; ++i) taa |= mx[ii + i] != 0; - } - else - { - taa = 1; - for (i = 0; i < n; ++i) taa &= mx[ii + i] != 0; - } - - // Check result - for (i = 0; i < n; ++i) - { - raa = my[ii + i] != 0; - if (raa != taa) - { - log_error("ERROR: sub_group_%s mismatch for local id " - "%d in sub group %d in group %d\n", - Which == 0 ? "any" : "all", i, j, k); - return -1; - } - } - } - - x += nw; - y += nw; - m += 2 * nw; - } - - return 0; - } -}; - -// Reduce functions -template struct RED -{ - static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng) - { - int i, ii, j, k, n; - int nj = (nw + ns - 1) / ns; - - ii = 0; - for (k = 0; k < ng; ++k) - { - for (j = 0; j < nj; ++j) - { - ii = j * ns; - n = ii + ns > nw ? nw - ii : ns; - - for (i = 0; i < n; ++i) - t[ii + i] = (Ty)( - (int)(genrand_int32(gMTdata) & 0x7fffffff) % ns + 1); - } - - // Now map into work group using map from device - for (j = 0; j < nw; ++j) - { - i = m[2 * j + 1] * ns + m[2 * j]; - x[j] = t[i]; - } - - x += nw; - m += 2 * nw; - } - } - - static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, int ns, int nw, - int ng) - { - int ii, i, j, k, n; - int nj = (nw + ns - 1) / ns; - Ty tr, rr; - - log_info(" sub_group_reduce_%s(%s)...\n", - Which == 0 ? "add" : (Which == 1 ? "max" : "min"), - TypeName::val()); - - for (k = 0; k < ng; ++k) - { - // Map to array indexed to array indexed by local ID and sub group - for (j = 0; j < nw; ++j) - { - i = m[2 * j + 1] * ns + m[2 * j]; - mx[i] = x[j]; - my[i] = y[j]; - } - - for (j = 0; j < nj; ++j) - { - ii = j * ns; - n = ii + ns > nw ? nw - ii : ns; - - // Compute target - if (Which == 0) - { - // add - tr = mx[ii]; - for (i = 1; i < n; ++i) tr += mx[ii + i]; - } - else if (Which == 1) - { - // max - tr = mx[ii]; - for (i = 1; i < n; ++i) - tr = tr > mx[ii + i] ? tr : mx[ii + i]; - } - else if (Which == 2) - { - // min - tr = mx[ii]; - for (i = 1; i < n; ++i) - tr = tr > mx[ii + i] ? mx[ii + i] : tr; - } - - // Check result - for (i = 0; i < n; ++i) - { - rr = my[ii + i]; - if (rr != tr) - { - log_error("ERROR: sub_group_reduce_%s(%s) mismatch for " - "local id %d in sub group %d in group %d\n", - Which == 0 ? "add" - : (Which == 1 ? "max" : "min"), - TypeName::val(), i, j, k); - return -1; - } - } - } - - x += nw; - y += nw; - m += 2 * nw; - } - - return 0; - } -}; - -// Scan Inclusive functions -template struct SCIN -{ - static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng) - { - int i, ii, j, k, n; - int nj = (nw + ns - 1) / ns; - - ii = 0; - for (k = 0; k < ng; ++k) - { - for (j = 0; j < nj; ++j) - { - ii = j * ns; - n = ii + ns > nw ? nw - ii : ns; - - for (i = 0; i < n; ++i) - // t[ii+i] = (Ty)((int)(genrand_int32(gMTdata) & 0x7fffffff) - // % ns + 1); - t[ii + i] = (Ty)i; - } - - // Now map into work group using map from device - for (j = 0; j < nw; ++j) - { - i = m[2 * j + 1] * ns + m[2 * j]; - x[j] = t[i]; - } - - x += nw; - m += 2 * nw; - } - } - - static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, int ns, int nw, - int ng) - { - int ii, i, j, k, n; - int nj = (nw + ns - 1) / ns; - Ty tr, rr; - - log_info(" sub_group_scan_inclusive_%s(%s)...\n", - Which == 0 ? "add" : (Which == 1 ? "max" : "min"), - TypeName::val()); - - for (k = 0; k < ng; ++k) - { - // Map to array indexed to array indexed by local ID and sub group - for (j = 0; j < nw; ++j) - { - i = m[2 * j + 1] * ns + m[2 * j]; - mx[i] = x[j]; - my[i] = y[j]; - } - - for (j = 0; j < nj; ++j) - { - ii = j * ns; - n = ii + ns > nw ? nw - ii : ns; - - // Check result - for (i = 0; i < n; ++i) - { - if (Which == 0) - { - tr = i == 0 ? mx[ii] : tr + mx[ii + i]; - } - else if (Which == 1) - { - tr = i == 0 ? mx[ii] - : (tr > mx[ii + i] ? tr : mx[ii + i]); - } - else - { - tr = i == 0 ? mx[ii] - : (tr > mx[ii + i] ? mx[ii + i] : tr); - } - - rr = my[ii + i]; - if (rr != tr) - { - log_error( - "ERROR: sub_group_scan_inclusive_%s(%s) mismatch " - "for local id %d in sub group %d in group %d\n", - Which == 0 ? "add" : (Which == 1 ? "max" : "min"), - TypeName::val(), i, j, k); - return -1; - } - } - } - - x += nw; - y += nw; - m += 2 * nw; - } - - return 0; - } -}; - -// Scan Exclusive functions -template struct SCEX -{ - static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng) - { - int i, ii, j, k, n; - int nj = (nw + ns - 1) / ns; - - ii = 0; - for (k = 0; k < ng; ++k) - { - for (j = 0; j < nj; ++j) - { - ii = j * ns; - n = ii + ns > nw ? nw - ii : ns; - - for (i = 0; i < n; ++i) - t[ii + i] = (Ty)( - (int)(genrand_int32(gMTdata) & 0x7fffffff) % ns + 1); - } - - // Now map into work group using map from device - for (j = 0; j < nw; ++j) - { - i = m[2 * j + 1] * ns + m[2 * j]; - x[j] = t[i]; - } - - x += nw; - m += 2 * nw; - } - } - - static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, int ns, int nw, - int ng) - { - int ii, i, j, k, n; - int nj = (nw + ns - 1) / ns; - Ty tr, trt, rr; - - log_info(" sub_group_scan_exclusive_%s(%s)...\n", - Which == 0 ? "add" : (Which == 1 ? "max" : "min"), - TypeName::val()); - - for (k = 0; k < ng; ++k) - { - // Map to array indexed to array indexed by local ID and sub group - for (j = 0; j < nw; ++j) - { - i = m[2 * j + 1] * ns + m[2 * j]; - mx[i] = x[j]; - my[i] = y[j]; - } - - for (j = 0; j < nj; ++j) - { - ii = j * ns; - n = ii + ns > nw ? nw - ii : ns; - - // Check result - for (i = 0; i < n; ++i) - { - if (Which == 0) - { - tr = i == 0 ? TypeIdentity::val() : tr + trt; - } - else if (Which == 1) - { - tr = i == 0 ? TypeIdentity::val() - : (trt > tr ? trt : tr); - } - else - { - tr = i == 0 ? TypeIdentity::val() - : (trt > tr ? tr : trt); - } - trt = mx[ii + i]; - rr = my[ii + i]; - - if (rr != tr) - { - log_error( - "ERROR: sub_group_scan_exclusive_%s(%s) mismatch " - "for local id %d in sub group %d in group %d\n", - Which == 0 ? "add" : (Which == 1 ? "max" : "min"), - TypeName::val(), i, j, k); - return -1; - } - } - } - - x += nw; - y += nw; - m += 2 * nw; - } - - return 0; - } -}; - -// Broadcast functios -template struct BC -{ - static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng) - { - int i, ii, j, k, l, n; - int nj = (nw + ns - 1) / ns; - int d = ns > 100 ? 100 : ns; - - ii = 0; - for (k = 0; k < ng; ++k) - { - for (j = 0; j < nj; ++j) - { - ii = j * ns; - n = ii + ns > nw ? nw - ii : ns; - l = (int)(genrand_int32(gMTdata) & 0x7fffffff) - % (d > n ? n : d); - - for (i = 0; i < n; ++i) - t[ii + i] = (Ty)((int)(genrand_int32(gMTdata) & 0x7fffffff) - % 100 * 100 - + l); - } - - // Now map into work group using map from device - for (j = 0; j < nw; ++j) - { - i = m[2 * j + 1] * ns + m[2 * j]; - x[j] = t[i]; - } - - x += nw; - m += 2 * nw; - } - } - - static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, int ns, int nw, - int ng) - { - int ii, i, j, k, l, n; - int nj = (nw + ns - 1) / ns; - Ty tr, rr; - - log_info(" sub_group_broadcast(%s)...\n", TypeName::val()); - - for (k = 0; k < ng; ++k) - { - // Map to array indexed to array indexed by local ID and sub group - for (j = 0; j < nw; ++j) - { - i = m[2 * j + 1] * ns + m[2 * j]; - mx[i] = x[j]; - my[i] = y[j]; - } - - for (j = 0; j < nj; ++j) - { - ii = j * ns; - n = ii + ns > nw ? nw - ii : ns; - l = (int)mx[ii] % 100; - tr = mx[ii + l]; - - // Check result - for (i = 0; i < n; ++i) - { - rr = my[ii + i]; - if (rr != tr) - { - log_error("ERROR: sub_group_broadcast(%s) mismatch for " - "local id %d in sub group %d in group %d\n", - TypeName::val(), i, j, k); - return -1; - } - } - } - - x += nw; - y += nw; - m += 2 * nw; - } - - return 0; - } -}; - -#define G 2000 -#define L 200 -struct run_for_type -{ - run_for_type(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements, - bool useCoreSubgroups) - { - device_ = device; - context_ = context; - queue_ = queue; - num_elements_ = num_elements; - useCoreSubgroups_ = useCoreSubgroups; - } - - template cl_int run() - { - cl_int error; - error = test, G, L>::run(device_, context_, queue_, - num_elements_, "test_bcast", - bcast_source, 0, useCoreSubgroups_); - error |= test, G, L>::run( - device_, context_, queue_, num_elements_, "test_redadd", - redadd_source, 0, useCoreSubgroups_); - error |= test, G, L>::run( - device_, context_, queue_, num_elements_, "test_redmax", - redmax_source, 0, useCoreSubgroups_); - error |= test, G, L>::run( - device_, context_, queue_, num_elements_, "test_redmin", - redmin_source, 0, useCoreSubgroups_); - error |= test, G, L>::run( - device_, context_, queue_, num_elements_, "test_scinadd", - scinadd_source, 0, useCoreSubgroups_); - error |= test, G, L>::run( - device_, context_, queue_, num_elements_, "test_scinmax", - scinmax_source, 0, useCoreSubgroups_); - error |= test, G, L>::run( - device_, context_, queue_, num_elements_, "test_scinmin", - scinmin_source, 0, useCoreSubgroups_); - error |= test, G, L>::run( - device_, context_, queue_, num_elements_, "test_scexadd", - scexadd_source, 0, useCoreSubgroups_); - error |= test, G, L>::run( - device_, context_, queue_, num_elements_, "test_scexmax", - scexmax_source, 0, useCoreSubgroups_); - error |= test, G, L>::run( - device_, context_, queue_, num_elements_, "test_scexmin", - scexmin_source, 0, useCoreSubgroups_); - return error; - } - -private: - cl_device_id device_; - cl_context context_; - cl_command_queue queue_; - int num_elements_; - bool useCoreSubgroups_; -}; - -// Entry point from main -int test_work_group_functions(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements, - bool useCoreSubgroups) -{ - int error; - error = test, G, L>::run(device, context, queue, num_elements, - "test_any", any_source, 0, - useCoreSubgroups); - error |= test, G, L>::run(device, context, queue, num_elements, - "test_all", all_source, 0, - useCoreSubgroups); - run_for_type rft(device, context, queue, num_elements, useCoreSubgroups); - error |= rft.run(); - error |= rft.run(); - error |= rft.run(); - error |= rft.run(); - error |= rft.run(); - error |= rft.run(); - // error |= rft.run(); - - return error; -} - -int test_work_group_functions_core(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) -{ - return test_work_group_functions(device, context, queue, num_elements, - true); -} - -int test_work_group_functions_ext(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) -{ - bool hasExtension = is_extension_available(device, "cl_khr_subgroups"); - - if (!hasExtension) - { - log_info( - "Device does not support 'cl_khr_subgroups'. Skipping the test.\n"); - return TEST_SKIPPED_ITSELF; - } - return test_work_group_functions(device, context, queue, num_elements, - false); -} diff --git a/test_conformance/subgroups/test_workitem.cpp b/test_conformance/subgroups/test_workitem.cpp index b77bfe1af6..7ffa6a7c39 100644 --- a/test_conformance/subgroups/test_workitem.cpp +++ b/test_conformance/subgroups/test_workitem.cpp @@ -227,9 +227,8 @@ int test_work_item_functions(cl_device_id device, cl_context context, "}"; const std::string &kernel_str = kernel_sstr.str(); const char *kernel_src = kernel_str.c_str(); - error = create_single_kernel_helper_with_build_options( - context, &program, &kernel, 1, &kernel_src, "get_test", - "-cl-std=CL2.0"); + error = create_single_kernel_helper(context, &program, &kernel, 1, + &kernel_src, "get_test"); if (error != 0) return error; error = get_max_allowed_work_group_size(context, kernel, &local, NULL); diff --git a/test_conformance/submission_details_template.txt b/test_conformance/submission_details_template.txt index 9d276a62e2..ff62483752 100644 --- a/test_conformance/submission_details_template.txt +++ b/test_conformance/submission_details_template.txt @@ -81,6 +81,12 @@ Platform Version: # Tests version: +# Commit SHAs (7-digit) of any cherry-picked patches subsequent to tagged +# version. Any patches included must apply without conflicts to the tagged +# version in the order listed. +# +Patches: + # Implementations that support cl_khr_icd are required to use a loader to run # the tests and document the loader that was used. # diff --git a/test_conformance/vectors/test_step.cpp b/test_conformance/vectors/test_step.cpp index 2f6ad187a0..089bad2f3a 100644 --- a/test_conformance/vectors/test_step.cpp +++ b/test_conformance/vectors/test_step.cpp @@ -172,6 +172,8 @@ int test_step_internal(cl_device_id deviceID, cl_context context, destroyClState(pClState); return -1; } + + clStateDestroyProgramAndKernel(pClState); } } diff --git a/test_conformance/workgroups/CMakeLists.txt b/test_conformance/workgroups/CMakeLists.txt index 088860868a..c90bef8858 100644 --- a/test_conformance/workgroups/CMakeLists.txt +++ b/test_conformance/workgroups/CMakeLists.txt @@ -14,6 +14,7 @@ set(${MODULE_NAME}_SOURCES test_wg_scan_inclusive_add.cpp test_wg_scan_inclusive_min.cpp test_wg_scan_inclusive_max.cpp + test_wg_suggested_local_work_size.cpp ) include(../CMakeCommon.txt) diff --git a/test_conformance/workgroups/main.cpp b/test_conformance/workgroups/main.cpp index 41ffa74192..abb1145b3c 100644 --- a/test_conformance/workgroups/main.cpp +++ b/test_conformance/workgroups/main.cpp @@ -24,27 +24,30 @@ #endif test_definition test_list[] = { - ADD_TEST(work_group_all), - ADD_TEST(work_group_any), - ADD_TEST(work_group_reduce_add), - ADD_TEST(work_group_reduce_min), - ADD_TEST(work_group_reduce_max), - ADD_TEST(work_group_scan_inclusive_add), - ADD_TEST(work_group_scan_inclusive_min), - ADD_TEST(work_group_scan_inclusive_max), - ADD_TEST(work_group_scan_exclusive_add), - ADD_TEST(work_group_scan_exclusive_min), - ADD_TEST(work_group_scan_exclusive_max), - ADD_TEST(work_group_broadcast_1D), - ADD_TEST(work_group_broadcast_2D), - ADD_TEST(work_group_broadcast_3D), + ADD_TEST_VERSION(work_group_all, Version(2, 0)), + ADD_TEST_VERSION(work_group_any, Version(2, 0)), + ADD_TEST_VERSION(work_group_reduce_add, Version(2, 0)), + ADD_TEST_VERSION(work_group_reduce_min, Version(2, 0)), + ADD_TEST_VERSION(work_group_reduce_max, Version(2, 0)), + ADD_TEST_VERSION(work_group_scan_inclusive_add, Version(2, 0)), + ADD_TEST_VERSION(work_group_scan_inclusive_min, Version(2, 0)), + ADD_TEST_VERSION(work_group_scan_inclusive_max, Version(2, 0)), + ADD_TEST_VERSION(work_group_scan_exclusive_add, Version(2, 0)), + ADD_TEST_VERSION(work_group_scan_exclusive_min, Version(2, 0)), + ADD_TEST_VERSION(work_group_scan_exclusive_max, Version(2, 0)), + ADD_TEST_VERSION(work_group_broadcast_1D, Version(2, 0)), + ADD_TEST_VERSION(work_group_broadcast_2D, Version(2, 0)), + ADD_TEST_VERSION(work_group_broadcast_3D, Version(2, 0)), + ADD_TEST(work_group_suggested_local_size_1D), + ADD_TEST(work_group_suggested_local_size_2D), + ADD_TEST(work_group_suggested_local_size_3D) }; const int test_num = ARRAY_SIZE(test_list); test_status InitCL(cl_device_id device) { auto version = get_device_cl_version(device); - auto expected_min_version = Version(2, 0); + auto expected_min_version = Version(1, 2); if (version < expected_min_version) { version_expected_info("Test", "OpenCL", diff --git a/test_conformance/workgroups/procs.h b/test_conformance/workgroups/procs.h index 2e6e79e262..6143d52531 100644 --- a/test_conformance/workgroups/procs.h +++ b/test_conformance/workgroups/procs.h @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2017, 2021 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -16,6 +16,7 @@ #include "harness/testHarness.h" #include "harness/kernelHelpers.h" #include "harness/errorHelpers.h" +#include "harness/typeWrappers.h" #include "harness/conversions.h" #include "harness/mt19937.h" @@ -36,3 +37,16 @@ extern int test_work_group_scan_exclusive_max(cl_device_id deviceID, cl_context extern int test_work_group_scan_inclusive_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_work_group_scan_inclusive_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_work_group_scan_inclusive_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); + +extern int test_work_group_suggested_local_size_1D(cl_device_id device, + cl_context context, + cl_command_queue queue, + int n_elems); +extern int test_work_group_suggested_local_size_2D(cl_device_id device, + cl_context context, + cl_command_queue queue, + int n_elems); +extern int test_work_group_suggested_local_size_3D(cl_device_id device, + cl_context context, + cl_command_queue queue, + int n_elems); diff --git a/test_conformance/workgroups/test_wg_all.cpp b/test_conformance/workgroups/test_wg_all.cpp index 2148fba7bd..ccf17b6e4f 100644 --- a/test_conformance/workgroups/test_wg_all.cpp +++ b/test_conformance/workgroups/test_wg_all.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -79,7 +79,8 @@ test_work_group_all(cl_device_id device, cl_context context, cl_command_queue qu int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_all_kernel_code, "test_wg_all", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_all_kernel_code, "test_wg_all"); if (err) return -1; diff --git a/test_conformance/workgroups/test_wg_any.cpp b/test_conformance/workgroups/test_wg_any.cpp index 35ce0d524f..4785ad5136 100644 --- a/test_conformance/workgroups/test_wg_any.cpp +++ b/test_conformance/workgroups/test_wg_any.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -79,7 +79,8 @@ test_work_group_any(cl_device_id device, cl_context context, cl_command_queue qu int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_any_kernel_code, "test_wg_any", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_any_kernel_code, "test_wg_any"); if (err) return -1; diff --git a/test_conformance/workgroups/test_wg_broadcast.cpp b/test_conformance/workgroups/test_wg_broadcast.cpp index 3da14fb5a9..29380211a4 100644 --- a/test_conformance/workgroups/test_wg_broadcast.cpp +++ b/test_conformance/workgroups/test_wg_broadcast.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -20,6 +20,8 @@ #include #include +#include + #include "procs.h" @@ -174,7 +176,9 @@ test_work_group_broadcast_1D(cl_device_id device, cl_context context, cl_command int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_broadcast_1D_kernel_code, "test_wg_broadcast_1D", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_broadcast_1D_kernel_code, + "test_wg_broadcast_1D"); if (err) return -1; @@ -281,7 +285,9 @@ test_work_group_broadcast_2D(cl_device_id device, cl_context context, cl_command int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_broadcast_2D_kernel_code, "test_wg_broadcast_2D", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_broadcast_2D_kernel_code, + "test_wg_broadcast_2D"); if (err) return -1; @@ -306,7 +312,7 @@ test_work_group_broadcast_2D(cl_device_id device, cl_context context, cl_command localsize[0] = localsize[1] = 1; } - num_workgroups = MAX(n_elems/wg_size[0], 16); + num_workgroups = std::max(n_elems / wg_size[0], (size_t)16); globalsize[0] = num_workgroups * localsize[0]; globalsize[1] = num_workgroups * localsize[1]; num_elements = globalsize[0] * globalsize[1]; @@ -406,7 +412,9 @@ test_work_group_broadcast_3D(cl_device_id device, cl_context context, cl_command int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_broadcast_3D_kernel_code, "test_wg_broadcast_3D", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_broadcast_3D_kernel_code, + "test_wg_broadcast_3D"); if (err) return -1; @@ -431,7 +439,7 @@ test_work_group_broadcast_3D(cl_device_id device, cl_context context, cl_command localsize[0] = localsize[1] = localsize[2] = 1; } - num_workgroups = MAX(n_elems/wg_size[0], 8); + num_workgroups = std::max(n_elems / wg_size[0], (size_t)8); globalsize[0] = num_workgroups * localsize[0]; globalsize[1] = num_workgroups * localsize[1]; globalsize[2] = num_workgroups * localsize[2]; diff --git a/test_conformance/workgroups/test_wg_reduce.cpp b/test_conformance/workgroups/test_wg_reduce.cpp index 5da7284acb..eb26f4985d 100644 --- a/test_conformance/workgroups/test_wg_reduce.cpp +++ b/test_conformance/workgroups/test_wg_reduce.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -176,7 +176,9 @@ test_work_group_reduce_add_int(cl_device_id device, cl_context context, cl_comma int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_add_kernel_code_int, "test_wg_reduce_add_int", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_reduce_add_kernel_code_int, + "test_wg_reduce_add_int"); if (err) return -1; @@ -279,7 +281,9 @@ test_work_group_reduce_add_uint(cl_device_id device, cl_context context, cl_comm int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_add_kernel_code_uint, "test_wg_reduce_add_uint", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_reduce_add_kernel_code_uint, + "test_wg_reduce_add_uint"); if (err) return -1; @@ -381,7 +385,9 @@ test_work_group_reduce_add_long(cl_device_id device, cl_context context, cl_comm int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_add_kernel_code_long, "test_wg_reduce_add_long", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_reduce_add_kernel_code_long, + "test_wg_reduce_add_long"); if (err) return -1; @@ -484,7 +490,9 @@ test_work_group_reduce_add_ulong(cl_device_id device, cl_context context, cl_com int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_add_kernel_code_ulong, "test_wg_reduce_add_ulong", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_reduce_add_kernel_code_ulong, + "test_wg_reduce_add_ulong"); if (err) return -1; diff --git a/test_conformance/workgroups/test_wg_reduce_max.cpp b/test_conformance/workgroups/test_wg_reduce_max.cpp index 2464beda00..3bbd3f25bf 100644 --- a/test_conformance/workgroups/test_wg_reduce_max.cpp +++ b/test_conformance/workgroups/test_wg_reduce_max.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -177,7 +177,9 @@ test_work_group_reduce_max_int(cl_device_id device, cl_context context, cl_comma int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_max_kernel_code_int, "test_wg_reduce_max_int", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_reduce_max_kernel_code_int, + "test_wg_reduce_max_int"); if (err) return -1; @@ -289,7 +291,9 @@ test_work_group_reduce_max_uint(cl_device_id device, cl_context context, cl_comm int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_max_kernel_code_uint, "test_wg_reduce_max_uint", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_reduce_max_kernel_code_uint, + "test_wg_reduce_max_uint"); if (err) return -1; @@ -400,7 +404,9 @@ test_work_group_reduce_max_long(cl_device_id device, cl_context context, cl_comm int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_max_kernel_code_long, "test_wg_reduce_max_long", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_reduce_max_kernel_code_long, + "test_wg_reduce_max_long"); if (err) return -1; @@ -512,7 +518,9 @@ test_work_group_reduce_max_ulong(cl_device_id device, cl_context context, cl_com int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_max_kernel_code_ulong, "test_wg_reduce_max_ulong", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_reduce_max_kernel_code_ulong, + "test_wg_reduce_max_ulong"); if (err) return -1; diff --git a/test_conformance/workgroups/test_wg_reduce_min.cpp b/test_conformance/workgroups/test_wg_reduce_min.cpp index f415aa74e2..7b1b22e88c 100644 --- a/test_conformance/workgroups/test_wg_reduce_min.cpp +++ b/test_conformance/workgroups/test_wg_reduce_min.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -177,7 +177,9 @@ test_work_group_reduce_min_int(cl_device_id device, cl_context context, cl_comma int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_min_kernel_code_int, "test_wg_reduce_min_int", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_reduce_min_kernel_code_int, + "test_wg_reduce_min_int"); if (err) return -1; @@ -289,7 +291,9 @@ test_work_group_reduce_min_uint(cl_device_id device, cl_context context, cl_comm int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_min_kernel_code_uint, "test_wg_reduce_min_uint", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_reduce_min_kernel_code_uint, + "test_wg_reduce_min_uint"); if (err) return -1; @@ -400,7 +404,9 @@ test_work_group_reduce_min_long(cl_device_id device, cl_context context, cl_comm int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_min_kernel_code_long, "test_wg_reduce_min_long", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_reduce_min_kernel_code_long, + "test_wg_reduce_min_long"); if (err) return -1; @@ -512,7 +518,9 @@ test_work_group_reduce_min_ulong(cl_device_id device, cl_context context, cl_com int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_min_kernel_code_ulong, "test_wg_reduce_min_ulong", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_reduce_min_kernel_code_ulong, + "test_wg_reduce_min_ulong"); if (err) return -1; diff --git a/test_conformance/workgroups/test_wg_scan_exclusive_add.cpp b/test_conformance/workgroups/test_wg_scan_exclusive_add.cpp index 07eedc1616..e695a16545 100644 --- a/test_conformance/workgroups/test_wg_scan_exclusive_add.cpp +++ b/test_conformance/workgroups/test_wg_scan_exclusive_add.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -184,7 +184,9 @@ test_work_group_scan_exclusive_add_int(cl_device_id device, cl_context context, int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_add_kernel_code_int, "test_wg_scan_exclusive_add_int", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_scan_exclusive_add_kernel_code_int, + "test_wg_scan_exclusive_add_int"); if (err) return -1; @@ -287,7 +289,9 @@ test_work_group_scan_exclusive_add_uint(cl_device_id device, cl_context context, int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_add_kernel_code_uint, "test_wg_scan_exclusive_add_uint", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_scan_exclusive_add_kernel_code_uint, + "test_wg_scan_exclusive_add_uint"); if (err) return -1; @@ -389,7 +393,9 @@ test_work_group_scan_exclusive_add_long(cl_device_id device, cl_context context, int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_add_kernel_code_long, "test_wg_scan_exclusive_add_long", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_scan_exclusive_add_kernel_code_long, + "test_wg_scan_exclusive_add_long"); if (err) return -1; @@ -492,7 +498,9 @@ test_work_group_scan_exclusive_add_ulong(cl_device_id device, cl_context context int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_add_kernel_code_ulong, "test_wg_scan_exclusive_add_ulong", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_scan_exclusive_add_kernel_code_ulong, + "test_wg_scan_exclusive_add_ulong"); if (err) return -1; diff --git a/test_conformance/workgroups/test_wg_scan_exclusive_max.cpp b/test_conformance/workgroups/test_wg_scan_exclusive_max.cpp index d20a31994d..644b3ccf31 100644 --- a/test_conformance/workgroups/test_wg_scan_exclusive_max.cpp +++ b/test_conformance/workgroups/test_wg_scan_exclusive_max.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -20,8 +20,9 @@ #include #include -#include "procs.h" +#include +#include "procs.h" const char *wg_scan_exclusive_max_kernel_code_int = "__kernel void test_wg_scan_exclusive_max_int(global int *input, global int *output)\n" @@ -79,7 +80,7 @@ verify_wg_scan_exclusive_max_int(int *inptr, int *outptr, size_t n, size_t wg_si log_info("work_group_scan_exclusive_max int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), max_, outptr[j+i]); return -1; } - max_ = MAX(inptr[j+i], max_); + max_ = std::max(inptr[j + i], max_); } } @@ -103,7 +104,7 @@ verify_wg_scan_exclusive_max_uint(unsigned int *inptr, unsigned int *outptr, siz log_info("work_group_scan_exclusive_max int: Error at %u: expected = %u, got = %u\n", (unsigned int)(j+i), max_, outptr[j+i]); return -1; } - max_ = MAX(inptr[j+i], max_); + max_ = std::max(inptr[j + i], max_); } } @@ -127,7 +128,7 @@ verify_wg_scan_exclusive_max_long(cl_long *inptr, cl_long *outptr, size_t n, siz log_info("work_group_scan_exclusive_max long: Error at %u: expected = %lld, got = %lld\n", (unsigned int)(j+i), max_, outptr[j+i]); return -1; } - max_ = MAX(inptr[j+i], max_); + max_ = std::max(inptr[j + i], max_); } } @@ -151,7 +152,7 @@ verify_wg_scan_exclusive_max_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, log_info("work_group_scan_exclusive_max ulong: Error at %u: expected = %llu, got = %llu\n", (unsigned int)(j+i), max_, outptr[j+i]); return -1; } - max_ = MAX(inptr[j+i], max_); + max_ = std::max(inptr[j + i], max_); } } @@ -176,7 +177,9 @@ test_work_group_scan_exclusive_max_int(cl_device_id device, cl_context context, int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_max_kernel_code_int, "test_wg_scan_exclusive_max_int", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_scan_exclusive_max_kernel_code_int, + "test_wg_scan_exclusive_max_int"); if (err) return -1; @@ -288,7 +291,9 @@ test_work_group_scan_exclusive_max_uint(cl_device_id device, cl_context context, int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_max_kernel_code_uint, "test_wg_scan_exclusive_max_uint", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_scan_exclusive_max_kernel_code_uint, + "test_wg_scan_exclusive_max_uint"); if (err) return -1; @@ -399,7 +404,9 @@ test_work_group_scan_exclusive_max_long(cl_device_id device, cl_context context, int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_max_kernel_code_long, "test_wg_scan_exclusive_max_long", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_scan_exclusive_max_kernel_code_long, + "test_wg_scan_exclusive_max_long"); if (err) return -1; @@ -511,7 +518,9 @@ test_work_group_scan_exclusive_max_ulong(cl_device_id device, cl_context context int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_max_kernel_code_ulong, "test_wg_scan_exclusive_max_ulong", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_scan_exclusive_max_kernel_code_ulong, + "test_wg_scan_exclusive_max_ulong"); if (err) return -1; diff --git a/test_conformance/workgroups/test_wg_scan_exclusive_min.cpp b/test_conformance/workgroups/test_wg_scan_exclusive_min.cpp index eb99796059..3c6dfc8755 100644 --- a/test_conformance/workgroups/test_wg_scan_exclusive_min.cpp +++ b/test_conformance/workgroups/test_wg_scan_exclusive_min.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -20,8 +20,9 @@ #include #include -#include "procs.h" +#include +#include "procs.h" const char *wg_scan_exclusive_min_kernel_code_int = "__kernel void test_wg_scan_exclusive_min_int(global int *input, global int *output)\n" @@ -80,7 +81,7 @@ verify_wg_scan_exclusive_min_int(int *inptr, int *outptr, size_t n, size_t wg_si log_info("work_group_scan_exclusive_min int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), min_, outptr[j+i]); return -1; } - min_ = MIN(inptr[j+i], min_); + min_ = std::min(inptr[j + i], min_); } } @@ -104,7 +105,7 @@ verify_wg_scan_exclusive_min_uint(unsigned int *inptr, unsigned int *outptr, siz log_info("work_group_scan_exclusive_min int: Error at %u: expected = %u, got = %u\n", j+i, min_, outptr[j+i]); return -1; } - min_ = MIN(inptr[j+i], min_); + min_ = std::min(inptr[j + i], min_); } } @@ -128,7 +129,7 @@ verify_wg_scan_exclusive_min_long(cl_long *inptr, cl_long *outptr, size_t n, siz log_info("work_group_scan_exclusive_min long: Error at %u: expected = %lld, got = %lld\n", (unsigned int)(j+i), min_, outptr[j+i]); return -1; } - min_ = MIN(inptr[j+i], min_); + min_ = std::min(inptr[j + i], min_); } } @@ -152,7 +153,7 @@ verify_wg_scan_exclusive_min_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, log_info("work_group_scan_exclusive_min ulong: Error at %u: expected = %llu, got = %llu\n", (unsigned int)(j+i), min_, outptr[j+i]); return -1; } - min_ = MIN(inptr[j+i], min_); + min_ = std::min(inptr[j + i], min_); } } @@ -177,7 +178,9 @@ test_work_group_scan_exclusive_min_int(cl_device_id device, cl_context context, int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_min_kernel_code_int, "test_wg_scan_exclusive_min_int", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_scan_exclusive_min_kernel_code_int, + "test_wg_scan_exclusive_min_int"); if (err) return -1; @@ -289,7 +292,9 @@ test_work_group_scan_exclusive_min_uint(cl_device_id device, cl_context context, int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_min_kernel_code_uint, "test_wg_scan_exclusive_min_uint", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_scan_exclusive_min_kernel_code_uint, + "test_wg_scan_exclusive_min_uint"); if (err) return -1; @@ -400,7 +405,9 @@ test_work_group_scan_exclusive_min_long(cl_device_id device, cl_context context, int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_min_kernel_code_long, "test_wg_scan_exclusive_min_long", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_scan_exclusive_min_kernel_code_long, + "test_wg_scan_exclusive_min_long"); if (err) return -1; @@ -512,7 +519,9 @@ test_work_group_scan_exclusive_min_ulong(cl_device_id device, cl_context context int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_min_kernel_code_ulong, "test_wg_scan_exclusive_min_ulong", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_scan_exclusive_min_kernel_code_ulong, + "test_wg_scan_exclusive_min_ulong"); if (err) return -1; diff --git a/test_conformance/workgroups/test_wg_scan_inclusive_add.cpp b/test_conformance/workgroups/test_wg_scan_inclusive_add.cpp index bff0b0f770..51c98a4e7b 100644 --- a/test_conformance/workgroups/test_wg_scan_inclusive_add.cpp +++ b/test_conformance/workgroups/test_wg_scan_inclusive_add.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -173,7 +173,9 @@ test_work_group_scan_inclusive_add_int(cl_device_id device, cl_context context, int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_add_kernel_code_int, "test_wg_scan_inclusive_add_int", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_scan_inclusive_add_kernel_code_int, + "test_wg_scan_inclusive_add_int"); if (err) return -1; @@ -276,7 +278,9 @@ test_work_group_scan_inclusive_add_uint(cl_device_id device, cl_context context, int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_add_kernel_code_uint, "test_wg_scan_inclusive_add_uint", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_scan_inclusive_add_kernel_code_uint, + "test_wg_scan_inclusive_add_uint"); if (err) return -1; @@ -378,7 +382,9 @@ test_work_group_scan_inclusive_add_long(cl_device_id device, cl_context context, int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_add_kernel_code_long, "test_wg_scan_inclusive_add_long", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_scan_inclusive_add_kernel_code_long, + "test_wg_scan_inclusive_add_long"); if (err) return -1; @@ -481,7 +487,9 @@ test_work_group_scan_inclusive_add_ulong(cl_device_id device, cl_context context int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_add_kernel_code_ulong, "test_wg_scan_inclusive_add_ulong", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_scan_inclusive_add_kernel_code_ulong, + "test_wg_scan_inclusive_add_ulong"); if (err) return -1; diff --git a/test_conformance/workgroups/test_wg_scan_inclusive_max.cpp b/test_conformance/workgroups/test_wg_scan_inclusive_max.cpp index c2455e9cf6..2a2e230e23 100644 --- a/test_conformance/workgroups/test_wg_scan_inclusive_max.cpp +++ b/test_conformance/workgroups/test_wg_scan_inclusive_max.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -20,6 +20,8 @@ #include #include +#include + #include "procs.h" @@ -75,7 +77,7 @@ verify_wg_scan_inclusive_max_int(int *inptr, int *outptr, size_t n, size_t wg_si m = wg_size; for (i = 0; i < m; ++i) { - max_ = MAX(inptr[j+i], max_); + max_ = std::max(inptr[j + i], max_); if (outptr[j+i] != max_) { log_info("work_group_scan_inclusive_max int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), max_, outptr[j+i]); return -1; @@ -99,7 +101,7 @@ verify_wg_scan_inclusive_max_uint(unsigned int *inptr, unsigned int *outptr, siz m = wg_size; for (i = 0; i < m; ++i) { - max_ = MAX(inptr[j+i], max_); + max_ = std::max(inptr[j + i], max_); if (outptr[j+i] != max_) { log_info("work_group_scan_inclusive_max int: Error at %lu: expected = %u, got = %u\n", (unsigned long)(j+i), max_, outptr[j+i]); return -1; @@ -123,7 +125,7 @@ verify_wg_scan_inclusive_max_long(cl_long *inptr, cl_long *outptr, size_t n, siz m = wg_size; for (i = 0; i < m; ++i) { - max_ = MAX(inptr[j+i], max_); + max_ = std::max(inptr[j + i], max_); if (outptr[j+i] != max_) { log_info("work_group_scan_inclusive_max long: Error at %u: expected = %lld, got = %lld\n", (unsigned int)(j+i), max_, outptr[j+i]); return -1; @@ -147,7 +149,7 @@ verify_wg_scan_inclusive_max_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, m = wg_size; for (i = 0; i < m; ++i) { - max_ = MAX(inptr[j+i], max_); + max_ = std::max(inptr[j + i], max_); if (outptr[j+i] != max_) { log_info("work_group_scan_inclusive_max ulong: Error at %u: expected = %llu, got = %llu\n", (unsigned int)(j+i), max_, outptr[j+i]); return -1; @@ -175,7 +177,9 @@ test_work_group_scan_inclusive_max_int(cl_device_id device, cl_context context, int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_max_kernel_code_int, "test_wg_scan_inclusive_max_int", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_scan_inclusive_max_kernel_code_int, + "test_wg_scan_inclusive_max_int"); if (err) return -1; @@ -278,7 +282,9 @@ test_work_group_scan_inclusive_max_uint(cl_device_id device, cl_context context, int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_max_kernel_code_uint, "test_wg_scan_inclusive_max_uint", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_scan_inclusive_max_kernel_code_uint, + "test_wg_scan_inclusive_max_uint"); if (err) return -1; @@ -380,7 +386,9 @@ test_work_group_scan_inclusive_max_long(cl_device_id device, cl_context context, int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_max_kernel_code_long, "test_wg_scan_inclusive_max_long", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_scan_inclusive_max_kernel_code_long, + "test_wg_scan_inclusive_max_long"); if (err) return -1; @@ -483,7 +491,9 @@ test_work_group_scan_inclusive_max_ulong(cl_device_id device, cl_context context int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_max_kernel_code_ulong, "test_wg_scan_inclusive_max_ulong", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_scan_inclusive_max_kernel_code_ulong, + "test_wg_scan_inclusive_max_ulong"); if (err) return -1; diff --git a/test_conformance/workgroups/test_wg_scan_inclusive_min.cpp b/test_conformance/workgroups/test_wg_scan_inclusive_min.cpp index a73c35a6df..adbdad56f1 100644 --- a/test_conformance/workgroups/test_wg_scan_inclusive_min.cpp +++ b/test_conformance/workgroups/test_wg_scan_inclusive_min.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -20,6 +20,8 @@ #include #include +#include + #include "procs.h" @@ -75,7 +77,7 @@ verify_wg_scan_inclusive_min_int(int *inptr, int *outptr, size_t n, size_t wg_si m = wg_size; for (i = 0; i < m; ++i) { - min_ = MIN(inptr[j+i], min_); + min_ = std::min(inptr[j + i], min_); if (outptr[j+i] != min_) { log_info("work_group_scan_inclusive_min int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), min_, outptr[j+i]); return -1; @@ -99,7 +101,7 @@ verify_wg_scan_inclusive_min_uint(unsigned int *inptr, unsigned int *outptr, siz m = wg_size; for (i = 0; i < m; ++i) { - min_ = MIN(inptr[j+i], min_); + min_ = std::min(inptr[j + i], min_); if (outptr[j+i] != min_) { log_info("work_group_scan_inclusive_min int: Error at %u: expected = %u, got = %u\n", (unsigned int)(j+i), min_, outptr[j+i]); return -1; @@ -123,7 +125,7 @@ verify_wg_scan_inclusive_min_long(cl_long *inptr, cl_long *outptr, size_t n, siz m = wg_size; for (i = 0; i < m; ++i) { - min_ = MIN(inptr[j+i], min_); + min_ = std::min(inptr[j + i], min_); if (outptr[j+i] != min_) { log_info("work_group_scan_inclusive_min long: Error at %u: expected = %lld, got = %lld\n", (unsigned int)(j+i), min_, outptr[j+i]); return -1; @@ -147,7 +149,7 @@ verify_wg_scan_inclusive_min_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, m = wg_size; for (i = 0; i < m; ++i) { - min_ = MIN(inptr[j+i], min_); + min_ = std::min(inptr[j + i], min_); if (outptr[j+i] != min_) { log_info("work_group_scan_inclusive_min ulong: Error at %u: expected = %llu, got = %llu\n", (unsigned int)(j+i), min_, outptr[j+i]); return -1; @@ -175,7 +177,9 @@ test_work_group_scan_inclusive_min_int(cl_device_id device, cl_context context, int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_min_kernel_code_int, "test_wg_scan_inclusive_min_int", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_scan_inclusive_min_kernel_code_int, + "test_wg_scan_inclusive_min_int"); if (err) return -1; @@ -278,7 +282,9 @@ test_work_group_scan_inclusive_min_uint(cl_device_id device, cl_context context, int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_min_kernel_code_uint, "test_wg_scan_inclusive_min_uint", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_scan_inclusive_min_kernel_code_uint, + "test_wg_scan_inclusive_min_uint"); if (err) return -1; @@ -380,7 +386,9 @@ test_work_group_scan_inclusive_min_long(cl_device_id device, cl_context context, int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_min_kernel_code_long, "test_wg_scan_inclusive_min_long", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_scan_inclusive_min_kernel_code_long, + "test_wg_scan_inclusive_min_long"); if (err) return -1; @@ -483,7 +491,9 @@ test_work_group_scan_inclusive_min_ulong(cl_device_id device, cl_context context int i; MTdata d; - err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_min_kernel_code_ulong, "test_wg_scan_inclusive_min_ulong", "-cl-std=CL2.0" ); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &wg_scan_inclusive_min_kernel_code_ulong, + "test_wg_scan_inclusive_min_ulong"); if (err) return -1; diff --git a/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp b/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp new file mode 100644 index 0000000000..aa02391c5d --- /dev/null +++ b/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp @@ -0,0 +1,611 @@ +// +// Copyright (c) 2021 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "harness/compat.h" + +#include +#include +#include +#include +#include + +#include "procs.h" +#include + +/** @brief Gets the number of elements of type s in a fixed length array of s */ +#define NELEMS(s) (sizeof(s) / sizeof((s)[0])) +#define test_error_ret_and_free(errCode, msg, retValue, ptr) \ + { \ + auto errCodeResult = errCode; \ + if (errCodeResult != CL_SUCCESS) \ + { \ + print_error(errCodeResult, msg); \ + free(ptr); \ + return retValue; \ + } \ + } + +const char* wg_scan_local_work_group_size = R"( + bool is_zero_linear_id() + { + size_t linear_id; +#if __OPENCL_VERSION__ < CL_VERSION_2_0 + linear_id = ((get_global_id(2) - get_global_offset(2)) * get_global_size(1) * get_global_size(0)) + + ((get_global_id(1) - get_global_offset(1)) * get_global_size(0)) + + (get_global_id(0) - get_global_offset(0)); +#else + linear_id = get_global_linear_id(); +#endif + return linear_id == 0; + } + + uint get_l_size(size_t dim) + { +#if __OPENCL_VERSION__ < CL_VERSION_2_0 + return get_local_size(dim); +#else + return get_enqueued_local_size(dim); +#endif + } + + __kernel void test_wg_scan_local_work_group_size(global uint *output) + { + if(!is_zero_linear_id()) return; + for (uint i = 0; i < 3; i++) + { + output[i] = get_l_size(i); + } + } + __kernel void test_wg_scan_local_work_group_size_static_local( + global uint *output) + { + __local char c[LOCAL_MEM_SIZE]; + + if(!is_zero_linear_id()) return; + for (uint i = 0; i < 3; i++) + { + output[i] = get_l_size(i); + } + } + __kernel void test_wg_scan_local_work_group_size_dynlocal( + global uint *output, + __local char * c) + { + if(!is_zero_linear_id()) return; + for (uint i = 0; i < 3; i++) + { + output[i] = get_l_size(i); + } + };)"; + +bool is_prime(size_t a) +{ + size_t c; + + for (c = 2; c < a; c++) + { + if (a % c == 0) return false; + } + return true; +} + +bool is_not_prime(size_t a) { return !is_prime(a); } + +bool is_not_even(size_t a) { return (is_prime(a) || (a % 2 == 1)); } + +bool is_not_odd(size_t a) { return (is_prime(a) || (a % 2 == 0)); } + +#define NELEMS(s) (sizeof(s) / sizeof((s)[0])) +/* The numbers we chose in the value_range are to be used for the second and + third dimension of the global work group size. The numbers below cover many + different cases: 1024 is a power of 2, 3 is an odd and small prime number, 12 + is a multiple of 4 but not a power of 2, 1031 is a large odd and prime number + and 1 is to test the lack of this dimension if the others are present */ +const size_t value_range[] = { 1024, 3, 12, 1031, 1 }; +/* The value_range_nD contains numbers to be used for the experiments with 2D + and 3D global work sizes. This is because we need smaller numbers so that the + resulting number of work items is meaningful and does not become too large. + The cases here are: 64 that is a power of 2, 3 is an odd and small prime + number, 12 is a multiple of 4 but not a power of 2, 113 is a large prime + number + and 1 is to test the lack of this dimension if the others are present */ +const size_t value_range_nD[] = { 64, 3, 12, 113, 1 }; +const size_t basic_increment = 16; +const size_t primes_increment = 1; +enum num_dims +{ + _1D = 1, + _2D = 2, + _3D = 3 +}; + +int do_test(cl_device_id device, cl_context context, cl_command_queue queue, + cl_kernel scan_kernel, int work_dim, size_t global_work_offset[3], + size_t test_values[3], size_t dyn_mem_size) +{ + size_t local_work_size[] = { 1, 1, 1 }; + size_t suggested_total_size; + size_t workgroupinfo_size; + cl_uint kernel_work_size[3] = { 0 }; + clMemWrapper buffer; + cl_platform_id platform; + + int err = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), + &platform, NULL); + test_error_ret(err, "clGetDeviceInfo failed", -1); + clGetKernelSuggestedLocalWorkSizeKHR_fn + clGetKernelSuggestedLocalWorkSizeKHR = + (clGetKernelSuggestedLocalWorkSizeKHR_fn) + clGetExtensionFunctionAddressForPlatform( + platform, "clGetKernelSuggestedLocalWorkSizeKHR"); + + if (clGetKernelSuggestedLocalWorkSizeKHR == NULL) + { + log_info("Extension 'cl_khr_suggested_local_work_size' could not be " + "found.\n"); + return TEST_FAIL; + } + + /* Create the actual buffer, using local_buffer as the host pointer, and ask + * to copy that into the buffer */ + buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(kernel_work_size), NULL, &err); + test_error_ret(err, "clCreateBuffer failed", -1); + err = clSetKernelArg(scan_kernel, 0, sizeof(buffer), &buffer); + test_error_ret(err, "clSetKernelArg failed", -1); + if (dyn_mem_size) + { + err = clSetKernelArg(scan_kernel, 1, dyn_mem_size, NULL); + test_error_ret(err, "clSetKernelArg failed", -1); + } + err = clGetKernelSuggestedLocalWorkSizeKHR(queue, scan_kernel, work_dim, + global_work_offset, test_values, + local_work_size); + test_error_ret(err, "clGetKernelSuggestedLocalWorkSizeKHR failed", -1); + suggested_total_size = + local_work_size[0] * local_work_size[1] * local_work_size[2]; + err = clGetKernelWorkGroupInfo( + scan_kernel, device, CL_KERNEL_WORK_GROUP_SIZE, + sizeof(workgroupinfo_size), &workgroupinfo_size, NULL); + test_error_ret(err, "clGetKernelWorkGroupInfo failed", -1); + if (suggested_total_size > workgroupinfo_size) + { + std::cout << "The suggested work group size consist of " + << suggested_total_size << " work items.\n" + << "Work items are limited by " << workgroupinfo_size + << std::endl; + std::cout << "Size from clGetKernelWorkGroupInfo: " + << workgroupinfo_size; + std::cout << "\nSize from clGetKernelSuggestedLocalWorkSizeKHR: " + << local_work_size[0] * local_work_size[1] + * local_work_size[2] + << std::endl; + return -1; + } + + err = + clEnqueueNDRangeKernel(queue, scan_kernel, work_dim, global_work_offset, + test_values, // global work size + NULL, 0, NULL, NULL); + test_error_ret(err, "clEnqueueNDRangeKernel failed", -1); + err = clEnqueueReadBuffer(queue, buffer, CL_NON_BLOCKING, 0, + sizeof(kernel_work_size), kernel_work_size, 0, + NULL, NULL); + test_error_ret(err, "clEnqueueReadBuffer failed", -1); + err = clFinish(queue); + test_error_ret(err, "clFinish failed", -1); + + if (kernel_work_size[0] != local_work_size[0] + || kernel_work_size[1] != local_work_size[1] + || kernel_work_size[2] != local_work_size[2]) + { + std::cout + << "Kernel work size differs from local work size suggested:\n" + << "Kernel work size: (" << kernel_work_size[0] << ", " + << kernel_work_size[1] << ", " << kernel_work_size[2] << ")" + << "Local work size: (" << local_work_size[0] << ", " + << local_work_size[1] << ", " << local_work_size[2] << ")\n"; + return -1; + } + return err; +} + +int do_test_work_group_suggested_local_size( + cl_device_id device, cl_context context, cl_command_queue queue, + bool (*skip_cond)(size_t), size_t start, size_t end, size_t incr, + cl_long max_local_mem_size, size_t global_work_offset[], num_dims dim) +{ + clProgramWrapper scan_program; + clKernelWrapper scan_kernel; + int err; + size_t test_values[] = { 1, 1, 1 }; + std::string kernel_names[6] = { + "test_wg_scan_local_work_group_size", + "test_wg_scan_local_work_group_size_static_local", + "test_wg_scan_local_work_group_size_static_local", + "test_wg_scan_local_work_group_size_static_local", + "test_wg_scan_local_work_group_size_static_local", + "test_wg_scan_local_work_group_size_dynlocal" + }; + std::string str_local_mem_size[6] = { + "-DLOCAL_MEM_SIZE=1", "-DLOCAL_MEM_SIZE=1024", + "-DLOCAL_MEM_SIZE=4096", "-DLOCAL_MEM_SIZE=16384", + "-DLOCAL_MEM_SIZE=32768", "-DLOCAL_MEM_SIZE=1" + }; + size_t local_mem_size[6] = { 1, 1024, 4096, 16384, 32768, 1 }; + size_t dyn_mem_size[6] = { 0, 0, 0, 0, 0, 1024 }; + cl_ulong kernel_local_mem_size; + for (int kernel_num = 0; kernel_num < 6; kernel_num++) + { + if (max_local_mem_size < local_mem_size[kernel_num]) continue; + // Create the kernel + err = create_single_kernel_helper( + context, &scan_program, &scan_kernel, 1, + &wg_scan_local_work_group_size, (kernel_names[kernel_num]).c_str(), + (str_local_mem_size[kernel_num]).c_str()); + test_error_ret(err, + ("create_single_kernel_helper failed for kernel " + + kernel_names[kernel_num]) + .c_str(), + -1); + + // Check if the local memory used by the kernel is going to exceed the + // max_local_mem_size + err = clGetKernelWorkGroupInfo( + scan_kernel, device, CL_KERNEL_LOCAL_MEM_SIZE, + sizeof(kernel_local_mem_size), &kernel_local_mem_size, NULL); + test_error_ret(err, "clGetKernelWorkGroupInfo failed", -1); + if (kernel_local_mem_size > max_local_mem_size) continue; + // return error if no number is found due to the skip condition + err = -1; + unsigned int j = 0; + size_t num_elems = NELEMS(value_range); + for (size_t i = start; i < end; i += incr) + { + if (skip_cond(i)) continue; + err = 0; + test_values[0] = i; + if (dim == _2D) test_values[1] = value_range_nD[j++ % num_elems]; + if (dim == _3D) + { + test_values[1] = value_range_nD[j++ % num_elems]; + test_values[2] = value_range_nD[rand() % num_elems]; + } + err |= do_test(device, context, queue, scan_kernel, dim, + global_work_offset, test_values, + dyn_mem_size[kernel_num]); + test_error_ret( + err, + ("do_test failed for kernel " + kernel_names[kernel_num]) + .c_str(), + -1); + } + } + return err; +} + +int test_work_group_suggested_local_size_1D(cl_device_id device, + cl_context context, + cl_command_queue queue, int n_elems) +{ + if (!is_extension_available(device, "cl_khr_suggested_local_work_size")) + { + log_info("Device does not support 'cl_khr_suggested_local_work_size'. " + "Skipping the test.\n"); + return TEST_SKIPPED_ITSELF; + } + cl_long max_local_mem_size; + cl_int err = + clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, + sizeof(max_local_mem_size), &max_local_mem_size, NULL); + test_error_ret(err, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.", + -1); + + size_t start, end, incr; + size_t global_work_offset[] = { 0, 0, 0 }; + size_t max_work_items = 0; + clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, + sizeof(max_work_items), &max_work_items, NULL); + + // odds + start = 1; + end = max_work_items; + incr = basic_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_odd, start, end, incr, + max_local_mem_size, global_work_offset, _1D); + test_error_ret( + err, "test_work_group_suggested_local_size_1D for odds failed.", -1); + log_info("test_work_group_suggested_local_size_1D odds passed\n"); + + // evens + start = 2; + end = max_work_items; + incr = basic_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_even, start, end, incr, + max_local_mem_size, global_work_offset, _1D); + test_error_ret( + err, "test_work_group_suggested_local_size_1D for evens failed.", -1); + log_info("test_work_group_suggested_local_size_1D evens passed\n"); + + // primes + start = max_work_items + 1; + end = 2 * max_work_items; + incr = primes_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_prime, start, end, incr, + max_local_mem_size, global_work_offset, _1D); + test_error_ret( + err, "test_work_group_suggested_local_size_1D for primes failed.", -1); + log_info("test_work_group_suggested_local_size_1D primes passed\n"); + + global_work_offset[0] = 10; + global_work_offset[1] = 10; + global_work_offset[2] = 10; + // odds + start = 1; + end = max_work_items; + incr = basic_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_odd, start, end, incr, + max_local_mem_size, global_work_offset, _1D); + test_error_ret(err, + "test_work_group_suggested_local_size_1D for odds with " + "global_work_offset failed.", + -1); + log_info("test_work_group_suggested_local_size_1D odds with " + "global_work_offset passed\n"); + + // evens + start = 2; + end = max_work_items; + incr = basic_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_even, start, end, incr, + max_local_mem_size, global_work_offset, _1D); + test_error_ret(err, + "test_work_group_suggested_local_size_1D for evens with " + "global_work_offset failed.", + -1); + log_info("test_work_group_suggested_local_size_1D evens with " + "global_work_offset passed\n"); + + // primes + start = max_work_items + 1; + end = 2 * max_work_items; + incr = primes_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_prime, start, end, incr, + max_local_mem_size, global_work_offset, _1D); + test_error_ret(err, + "test_work_group_suggested_local_size_1D for primes with " + "global_work_offset failed.", + -1); + log_info("test_work_group_suggested_local_size_1D primes with " + "global_work_offset passed\n"); + + return err; +} + +int test_work_group_suggested_local_size_2D(cl_device_id device, + cl_context context, + cl_command_queue queue, int n_elems) +{ + if (!is_extension_available(device, "cl_khr_suggested_local_work_size")) + { + log_info("Device does not support 'cl_khr_suggested_local_work_size'. " + "Skipping the test.\n"); + return TEST_SKIPPED_ITSELF; + } + cl_long max_local_mem_size; + cl_int err = + clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, + sizeof(max_local_mem_size), &max_local_mem_size, NULL); + test_error_ret(err, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.", + -1); + + size_t start, end, incr; + size_t global_work_offset[] = { 0, 0, 0 }; + size_t max_work_items = 0; + clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, + sizeof(max_work_items), &max_work_items, NULL); + + // odds + start = 1; + end = max_work_items; + incr = basic_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_odd, start, end, incr, + max_local_mem_size, global_work_offset, _2D); + test_error_ret( + err, "test_work_group_suggested_local_size_2D for odds failed.", -1); + log_info("test_work_group_suggested_local_size_2D odds passed\n"); + + // evens + start = 2; + end = max_work_items; + incr = basic_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_even, start, end, incr, + max_local_mem_size, global_work_offset, _2D); + test_error_ret( + err, "test_work_group_suggested_local_size_2D for evens failed.", -1); + log_info("test_work_group_suggested_local_size_2D evens passed\n"); + + // primes + start = max_work_items + 1; + end = max_work_items + max_work_items / 4; + incr = primes_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_prime, start, end, incr, + max_local_mem_size, global_work_offset, _2D); + test_error_ret( + err, "test_work_group_suggested_local_size_2D for primes failed.", -1); + log_info("test_work_group_suggested_local_size_2D primes passed\n"); + + global_work_offset[0] = 10; + global_work_offset[1] = 10; + global_work_offset[2] = 10; + + // odds + start = 1; + end = max_work_items; + incr = basic_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_odd, start, end, incr, + max_local_mem_size, global_work_offset, _2D); + test_error_ret(err, + "test_work_group_suggested_local_size_2D for odds with " + "global_work_offset failed.", + -1); + log_info("test_work_group_suggested_local_size_2D odds with " + "global_work_offset passed\n"); + + // evens + start = 2; + end = max_work_items; + incr = basic_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_even, start, end, incr, + max_local_mem_size, global_work_offset, _2D); + test_error_ret(err, + "test_work_group_suggested_local_size_2D for evens with " + "global_work_offset failed.", + -1); + log_info("test_work_group_suggested_local_size_2D evens with " + "global_work_offset passed\n"); + + // primes + start = max_work_items + 1; + end = max_work_items + max_work_items / 4; + incr = primes_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_prime, start, end, incr, + max_local_mem_size, global_work_offset, _2D); + test_error_ret(err, + "test_work_group_suggested_local_size_2D for primes with " + "global_work_offset failed.", + -1); + log_info("test_work_group_suggested_local_size_2D primes with " + "global_work_offset passed\n"); + + return err; +} + +int test_work_group_suggested_local_size_3D(cl_device_id device, + cl_context context, + cl_command_queue queue, int n_elems) +{ + if (!is_extension_available(device, "cl_khr_suggested_local_work_size")) + { + log_info("Device does not support 'cl_khr_suggested_local_work_size'. " + "Skipping the test.\n"); + return TEST_SKIPPED_ITSELF; + } + cl_long max_local_mem_size; + cl_int err = + clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, + sizeof(max_local_mem_size), &max_local_mem_size, NULL); + test_error_ret(err, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.", + -1); + + size_t start, end, incr; + size_t global_work_offset[] = { 0, 0, 0 }; + size_t max_work_items = 0; + clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, + sizeof(max_work_items), &max_work_items, NULL); + + // odds + start = 1; + end = max_work_items / 2; + incr = basic_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_odd, start, end, incr, + max_local_mem_size, global_work_offset, _3D); + test_error_ret( + err, "test_work_group_suggested_local_size_3D for odds failed.", -1); + log_info("test_work_group_suggested_local_size_3D odds passed\n"); + + // evens + start = 2; + end = max_work_items / 2; + incr = basic_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_even, start, end, incr, + max_local_mem_size, global_work_offset, _3D); + test_error_ret( + err, "test_work_group_suggested_local_size_3D for evens failed.", -1); + log_info("test_work_group_suggested_local_size_3D evens passed\n"); + + // primes + start = max_work_items + 1; + end = max_work_items + max_work_items / 4; + incr = primes_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_prime, start, end, incr, + max_local_mem_size, global_work_offset, _3D); + test_error_ret( + err, "test_work_group_suggested_local_size_3D for primes failed.", -1); + log_info("test_work_group_suggested_local_size_3D primes passed\n"); + + global_work_offset[0] = 10; + global_work_offset[1] = 10; + global_work_offset[2] = 10; + + // odds + start = 1; + end = max_work_items / 2; + incr = basic_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_odd, start, end, incr, + max_local_mem_size, global_work_offset, _3D); + test_error_ret(err, + "test_work_group_suggested_local_size_3D for odds with " + "global_work_offset failed.", + -1); + log_info("test_work_group_suggested_local_size_3D odds with " + "global_work_offset passed\n"); + + // evens + start = 2; + end = max_work_items / 2; + incr = basic_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_even, start, end, incr, + max_local_mem_size, global_work_offset, _3D); + test_error_ret(err, + "test_work_group_suggested_local_size_3D for evens with " + "global_work_offset failed.", + -1); + log_info("test_work_group_suggested_local_size_3D evens with " + "global_work_offset passed\n"); + + // primes + start = max_work_items + 1; + end = max_work_items + max_work_items / 4; + incr = primes_increment; + err = do_test_work_group_suggested_local_size( + device, context, queue, is_not_prime, start, end, incr, + max_local_mem_size, global_work_offset, _3D); + test_error_ret(err, + "test_work_group_suggested_local_size_3D for primes with " + "global_work_offset failed.", + -1); + log_info("test_work_group_suggested_local_size_3D primes with " + "global_work_offset passed\n"); + + return err; +} diff --git a/test_extensions/CMakeLists.txt b/test_extensions/CMakeLists.txt deleted file mode 100644 index 3c48e18699..0000000000 --- a/test_extensions/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -set(HARNESS_LIB harness) -add_subdirectory( media_sharing ) diff --git a/test_extensions/media_sharing/main.cpp b/test_extensions/media_sharing/main.cpp deleted file mode 100644 index f0c3aff8ac..0000000000 --- a/test_extensions/media_sharing/main.cpp +++ /dev/null @@ -1,204 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include -#include - -#include "harness/testHarness.h" -#include "utils.h" -#include "procs.h" - - -test_definition test_list[] = { -ADD_TEST( context_create ), -ADD_TEST( get_device_ids ), -ADD_TEST( api ), -ADD_TEST( kernel ), -ADD_TEST( other_data_types ), -ADD_TEST( memory_access ), -ADD_TEST( interop_user_sync ) -}; - -const int test_num = ARRAY_SIZE(test_list); - -clGetDeviceIDsFromDX9MediaAdapterKHR_fn clGetDeviceIDsFromDX9MediaAdapterKHR = NULL; -clCreateFromDX9MediaSurfaceKHR_fn clCreateFromDX9MediaSurfaceKHR = NULL; -clEnqueueAcquireDX9MediaSurfacesKHR_fn clEnqueueAcquireDX9MediaSurfacesKHR = NULL; -clEnqueueReleaseDX9MediaSurfacesKHR_fn clEnqueueReleaseDX9MediaSurfacesKHR = NULL; - -cl_platform_id gPlatformIDdetected; -cl_device_id gDeviceIDdetected; -cl_device_type gDeviceTypeSelected = CL_DEVICE_TYPE_DEFAULT; - -bool MediaSurfaceSharingExtensionInit() -{ - clGetDeviceIDsFromDX9MediaAdapterKHR = (clGetDeviceIDsFromDX9MediaAdapterKHR_fn)clGetExtensionFunctionAddressForPlatform(gPlatformIDdetected, "clGetDeviceIDsFromDX9MediaAdapterKHR"); - if (clGetDeviceIDsFromDX9MediaAdapterKHR == NULL) - { - log_error("clGetExtensionFunctionAddressForPlatform(clGetDeviceIDsFromDX9MediaAdapterKHR) returned NULL.\n"); - return false; - } - - clCreateFromDX9MediaSurfaceKHR = (clCreateFromDX9MediaSurfaceKHR_fn)clGetExtensionFunctionAddressForPlatform(gPlatformIDdetected, "clCreateFromDX9MediaSurfaceKHR"); - if (clCreateFromDX9MediaSurfaceKHR == NULL) - { - log_error("clGetExtensionFunctionAddressForPlatform(clCreateFromDX9MediaSurfaceKHR) returned NULL.\n"); - return false; - } - - clEnqueueAcquireDX9MediaSurfacesKHR = (clEnqueueAcquireDX9MediaSurfacesKHR_fn)clGetExtensionFunctionAddressForPlatform(gPlatformIDdetected, "clEnqueueAcquireDX9MediaSurfacesKHR"); - if (clEnqueueAcquireDX9MediaSurfacesKHR == NULL) - { - log_error("clGetExtensionFunctionAddressForPlatform(clEnqueueAcquireDX9MediaSurfacesKHR) returned NULL.\n"); - return false; - } - - clEnqueueReleaseDX9MediaSurfacesKHR = (clEnqueueReleaseDX9MediaSurfacesKHR_fn)clGetExtensionFunctionAddressForPlatform(gPlatformIDdetected, "clEnqueueReleaseDX9MediaSurfacesKHR"); - if (clEnqueueReleaseDX9MediaSurfacesKHR == NULL) - { - log_error("clGetExtensionFunctionAddressForPlatform(clEnqueueReleaseDX9MediaSurfacesKHR) returned NULL.\n"); - return false; - } - - return true; -} - -bool DetectPlatformAndDevice() -{ - std::vector platforms; - cl_uint platformsNum = 0; - cl_int error = clGetPlatformIDs(0, 0, &platformsNum); - if (error != CL_SUCCESS) - { - print_error(error, "clGetPlatformIDs failed\n"); - return false; - } - - platforms.resize(platformsNum); - error = clGetPlatformIDs(platformsNum, &platforms[0], 0); - if (error != CL_SUCCESS) - { - print_error(error, "clGetPlatformIDs failed\n"); - return false; - } - - bool found = false; - for (size_t i = 0; i < platformsNum; ++i) - { - std::vector devices; - cl_uint devicesNum = 0; - error = clGetDeviceIDs(platforms[i], gDeviceTypeSelected, 0, 0, &devicesNum); - if (error != CL_SUCCESS) - { - print_error(error, "clGetDeviceIDs failed\n"); - return false; - } - - devices.resize(devicesNum); - error = clGetDeviceIDs(platforms[i], gDeviceTypeSelected, devicesNum, &devices[0], 0); - if (error != CL_SUCCESS) - { - print_error(error, "clGetDeviceIDs failed\n"); - return false; - } - - for (size_t j = 0; j < devicesNum; ++j) - { - if (is_extension_available(devices[j], "cl_khr_dx9_media_sharing")) - { - gPlatformIDdetected = platforms[i]; - gDeviceIDdetected = devices[j]; - found = true; - break; - } - } - } - - if (!found) - { - log_info("Test was not run, because the media surface sharing extension is not supported for any devices.\n"); - return false; - } - - return true; -} - -bool CmdlineParse(int argc, const char *argv[]) -{ - char *env_mode = getenv( "CL_DEVICE_TYPE" ); - if( env_mode != NULL ) - { - if(strcmp(env_mode, "gpu") == 0 || strcmp(env_mode, "CL_DEVICE_TYPE_GPU") == 0) - gDeviceTypeSelected = CL_DEVICE_TYPE_GPU; - else if(strcmp(env_mode, "cpu") == 0 || strcmp(env_mode, "CL_DEVICE_TYPE_CPU") == 0) - gDeviceTypeSelected = CL_DEVICE_TYPE_CPU; - else if(strcmp(env_mode, "accelerator") == 0 || strcmp(env_mode, "CL_DEVICE_TYPE_ACCELERATOR") == 0) - gDeviceTypeSelected = CL_DEVICE_TYPE_ACCELERATOR; - else if(strcmp(env_mode, "default") == 0 || strcmp(env_mode, "CL_DEVICE_TYPE_DEFAULT") == 0) - gDeviceTypeSelected = CL_DEVICE_TYPE_DEFAULT; - else - { - log_error("Unknown CL_DEVICE_TYPE env variable setting: %s.\nAborting...\n", env_mode); - return false; - } - } - - for (int i = 0; i < argc; ++i) - { - if(strcmp(argv[i], "gpu") == 0 || strcmp(argv[i], "CL_DEVICE_TYPE_GPU") == 0) - { - gDeviceTypeSelected = CL_DEVICE_TYPE_GPU; - continue; - } - else if(strcmp( argv[i], "cpu") == 0 || strcmp(argv[i], "CL_DEVICE_TYPE_CPU") == 0) - { - gDeviceTypeSelected = CL_DEVICE_TYPE_CPU; - continue; - } - else if(strcmp( argv[i], "accelerator") == 0 || strcmp(argv[i], "CL_DEVICE_TYPE_ACCELERATOR") == 0) - { - gDeviceTypeSelected = CL_DEVICE_TYPE_ACCELERATOR; - continue; - } - else if(strcmp(argv[i], "CL_DEVICE_TYPE_DEFAULT") == 0) - { - gDeviceTypeSelected = CL_DEVICE_TYPE_DEFAULT; - continue; - } - else if (strcmp(argv[i], "sw") == 0 || strcmp(argv[i], "software") == 0) - { - CDeviceWrapper::AccelerationType(CDeviceWrapper::ACCELERATION_SW); - } - } - - return true; -} - -int main(int argc, const char *argv[]) -{ - if (!CmdlineParse(argc, argv)) - return TEST_FAIL; - - if (!DetectPlatformAndDevice()) - { - log_info("Test was not run, because the media surface sharing extension is not supported\n"); - return TEST_SKIP; - } - - if (!MediaSurfaceSharingExtensionInit()) - return TEST_FAIL; - - return runTestHarness(argc, argv, test_num, test_list, true, 0); -} diff --git a/test_extensions/media_sharing/test_create_context.cpp b/test_extensions/media_sharing/test_create_context.cpp deleted file mode 100644 index 5637bc5423..0000000000 --- a/test_extensions/media_sharing/test_create_context.cpp +++ /dev/null @@ -1,318 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "utils.h" - -int context_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, - int num_elements, unsigned int width, unsigned int height, - TContextFuncType functionCreate, cl_dx9_media_adapter_type_khr adapterType, - TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle) -{ - CResult result; - - //create device - std::auto_ptr deviceWrapper; - if (!DeviceCreate(adapterType, deviceWrapper)) - { - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - } - - //generate input data - std::vector bufferIn(width * height * 3 / 2, 0); - if(!YUVGenerate(surfaceFormat, bufferIn, width, height, 0, 255)) - { - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - } - - while (deviceWrapper->AdapterNext()) - { - cl_int error; - //check if the test can be run on the adapter - if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle))) - { - return result.Result(); - } - - if (surfaceFormat != SURFACE_FORMAT_NV12 && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat)) - { - std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no"; - std::string formatStr; - std::string adapterStr; - SurfaceFormatToString(surfaceFormat, formatStr); - AdapterToString(adapterType, adapterStr); - log_info("Skipping test case, image format is not supported by a device (adapter type: %s, format: %s, shared handle: %s)\n", - adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str()); - return result.Result(); - } - - void *objectSharedHandle = 0; - std::auto_ptr surface; - if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surface, - (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectSharedHandle)) - { - log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx()); - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - } - - cl_context_properties contextProperties[] = { - CL_CONTEXT_PLATFORM, (cl_context_properties)gPlatformIDdetected, - AdapterTypeToContextInfo(adapterType), (cl_context_properties)deviceWrapper->Device(), - 0, - }; - - clContextWrapper ctx; - switch(functionCreate) - { - case CONTEXT_CREATE_DEFAULT: - ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error); - break; - case CONTEXT_CREATE_FROM_TYPE: - ctx = clCreateContextFromType(&contextProperties[0], gDeviceTypeSelected, NULL, NULL, &error); - break; - default: - log_error("Unknown context creation function enum\n"); - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - break; - } - - if (error != CL_SUCCESS) - { - std::string functionName; - FunctionContextCreateToString(functionCreate, functionName); - log_error("%s failed: %s\n", functionName.c_str(), IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - if (!YUVSurfaceSet(surfaceFormat, surface, bufferIn, width, height)) - { - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - } - -#if defined(_WIN32) - cl_dx9_surface_info_khr surfaceInfo; - surfaceInfo.resource = *(static_cast(surface.get())); - surfaceInfo.shared_handle = objectSharedHandle; -#else - void *surfaceInfo = 0; - return TEST_NOT_IMPLEMENTED; -#endif - - std::vector memObjList; - unsigned int planesNum = PlanesNum(surfaceFormat); - std::vector planesList(planesNum); - for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx) - { - planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfo, planeIdx, &error); - if (error != CL_SUCCESS) - { - log_error("clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n", planeIdx, IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - memObjList.push_back(planesList[planeIdx]); - } - - clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(ctx, gDeviceIDdetected, 0, &error ); - if (error != CL_SUCCESS) - { - log_error("Unable to create command queue: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - if (!ImageInfoVerify(adapterType, memObjList, width, height, surface, objectSharedHandle)) - { - log_error("Image info verification failed\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - cl_event event; - error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast(memObjList.size()), - &memObjList.at(0), 0, NULL, &event); - if (error != CL_SUCCESS) - { - log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - cl_uint eventType = 0; - error = clGetEventInfo( event, CL_EVENT_COMMAND_TYPE, sizeof(eventType), &eventType, NULL); - if (error != CL_SUCCESS) - { - log_error("clGetEventInfo failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - if(eventType != CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR) - { - log_error("Invalid event != CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - clReleaseEvent(event); - - size_t origin[3] = {0,0,0}; - size_t offset = 0; - size_t frameSize = width * height * 3 / 2; - std::vector out( frameSize, 0 ); - for (size_t i = 0; i < memObjList.size(); ++i) - { - size_t planeWidth = (i == 0) ? width: width / 2; - size_t planeHeight = (i == 0) ? height: height / 2; - size_t regionPlane[3] = {planeWidth, planeHeight, 1}; - - error = clEnqueueReadImage(cmdQueue, memObjList.at(i), CL_TRUE, origin, regionPlane, 0, 0, &out.at(offset), 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - offset += planeWidth * planeHeight; - } - - if (!YUVCompare(surfaceFormat, out, bufferIn, width, height)) - { - log_error("OCL object verification failed - clEnqueueReadImage\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast(memObjList.size()), - &memObjList.at(0), 0, NULL, &event); - if (error != CL_SUCCESS) - { - log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - eventType = 0; - error = clGetEventInfo( event, CL_EVENT_COMMAND_TYPE, sizeof(eventType), &eventType, NULL); - if (error != CL_SUCCESS) - { - log_error("clGetEventInfo failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - if(eventType != CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR) - { - log_error("Invalid event != CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - clReleaseEvent(event); - - //object verification - std::vector bufferOut(frameSize, 0); - if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut, width, height)) - { - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - if (!YUVCompare(surfaceFormat, bufferOut, bufferIn, width, height)) - { - log_error("Media surface is different than expected\n"); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - } - - if (deviceWrapper->Status() != DEVICE_PASS) - { - std::string adapterName; - AdapterToString(adapterType, adapterName); - if (deviceWrapper->Status() == DEVICE_FAIL) - { - log_error("%s init failed\n", adapterName.c_str()); - result.ResultSub(CResult::TEST_FAIL); - } - else - { - log_error("%s init incomplete due to unsupported device\n", adapterName.c_str()); - result.ResultSub(CResult::TEST_NOTSUPPORTED); - } - } - - return result.Result(); -} - -int test_context_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - const unsigned int WIDTH = 256; - const unsigned int HEIGHT = 256; - - std::vector adapterTypes; -#if defined(_WIN32) - adapterTypes.push_back(CL_ADAPTER_D3D9_KHR); - adapterTypes.push_back(CL_ADAPTER_D3D9EX_KHR); - adapterTypes.push_back(CL_ADAPTER_DXVA_KHR); -#endif - - std::vector contextFuncs; - contextFuncs.push_back(CONTEXT_CREATE_DEFAULT); - contextFuncs.push_back(CONTEXT_CREATE_FROM_TYPE); - - std::vector formats; - formats.push_back(SURFACE_FORMAT_NV12); - formats.push_back(SURFACE_FORMAT_YV12); - - std::vector sharedHandleTypes; - sharedHandleTypes.push_back(SHARED_HANDLE_DISABLED); -#if defined(_WIN32) - sharedHandleTypes.push_back(SHARED_HANDLE_ENABLED); -#endif - - CResult result; - for (size_t adapterTypeIdx = 0; adapterTypeIdx < adapterTypes.size(); ++adapterTypeIdx) - { - //iteration through all create context functions - for (size_t contextFuncIdx = 0; contextFuncIdx < contextFuncs.size(); ++contextFuncIdx) - { - //iteration through surface formats - for (size_t formatIdx = 0; formatIdx < formats.size(); ++formatIdx) - { - //shared handle enabled or disabled - for (size_t sharedHandleIdx = 0; sharedHandleIdx < sharedHandleTypes.size(); ++sharedHandleIdx) - { - if (adapterTypes[adapterTypeIdx] == CL_ADAPTER_D3D9_KHR && sharedHandleTypes[sharedHandleIdx] == SHARED_HANDLE_ENABLED) - continue; - - if(context_create(deviceID, context, queue, num_elements, WIDTH, HEIGHT, - contextFuncs[contextFuncIdx], adapterTypes[adapterTypeIdx], formats[formatIdx], - sharedHandleTypes[sharedHandleIdx]) != 0) - { - std::string sharedHandle = (sharedHandleTypes[sharedHandleIdx] == SHARED_HANDLE_ENABLED)? "shared handle": "no shared handle"; - std::string formatStr; - std::string adapterTypeStr; - SurfaceFormatToString(formats[formatIdx], formatStr); - AdapterToString(adapterTypes[adapterTypeIdx], adapterTypeStr); - - log_error("\nTest case - clCreateContext (%s, %s, %s) failed\n\n", adapterTypeStr.c_str(), formatStr.c_str(), sharedHandle.c_str()); - result.ResultSub(CResult::TEST_FAIL); - } - } - } - } - } - - return result.Result(); -} diff --git a/test_extensions/media_sharing/test_functions_api.cpp b/test_extensions/media_sharing/test_functions_api.cpp deleted file mode 100644 index cdc6ce860d..0000000000 --- a/test_extensions/media_sharing/test_functions_api.cpp +++ /dev/null @@ -1,617 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "utils.h" - -int api_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, - unsigned int iterationNum, unsigned int width, unsigned int height, cl_dx9_media_adapter_type_khr adapterType, - TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle) -{ - const unsigned int FRAME_NUM = 2; - const cl_uchar MAX_VALUE = 255 / 2; - CResult result; - - //create device - std::auto_ptr deviceWrapper; - if (!DeviceCreate(adapterType, deviceWrapper)) - { - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - } - - //generate input and expected data - std::vector > bufferRef1(FRAME_NUM); - std::vector > bufferRef2(FRAME_NUM); - std::vector > bufferRef3(FRAME_NUM); - size_t frameSize = width * height * 3 / 2; - cl_uchar step = MAX_VALUE / FRAME_NUM; - for (size_t i = 0; i < FRAME_NUM; ++i) - { - if (!YUVGenerate(surfaceFormat, bufferRef1[i], width, height, static_cast(step * i), static_cast(step * (i + 1))) || - !YUVGenerate(surfaceFormat, bufferRef2[i], width, height, static_cast(step * i), static_cast(step * (i + 1)), 0.2) || - !YUVGenerate(surfaceFormat, bufferRef3[i], width, height, static_cast(step * i), static_cast(step * (i + 1)), 0.4)) - { - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - } - } - - //iterates through all devices - while (deviceWrapper->AdapterNext()) - { - cl_int error; - //check if the test can be run on the adapter - if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle))) - { - return result.Result(); - } - - if (surfaceFormat != SURFACE_FORMAT_NV12 && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat)) - { - std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no"; - std::string formatStr; - std::string adapterStr; - SurfaceFormatToString(surfaceFormat, formatStr); - AdapterToString(adapterType, adapterStr); - log_info("Skipping test case, image format is not supported by a device (adapter type: %s, format: %s, shared handle: %s)\n", - adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str()); - return result.Result(); - } - - void *objectSharedHandle = 0; - std::auto_ptr surface; - - //create surface - if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surface, - (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectSharedHandle)) - { - log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx()); - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - } - - cl_context_properties contextProperties[] = { - CL_CONTEXT_PLATFORM, (cl_context_properties)gPlatformIDdetected, - AdapterTypeToContextInfo(adapterType), (cl_context_properties)deviceWrapper->Device(), - 0, - }; - - clContextWrapper ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error); - if (error != CL_SUCCESS) - { - log_error("clCreateContext failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - -#if defined(_WIN32) - cl_dx9_surface_info_khr surfaceInfo; - surfaceInfo.resource = *(static_cast(surface.get())); - surfaceInfo.shared_handle = objectSharedHandle; -#else - void *surfaceInfo = 0; - return TEST_NOT_IMPLEMENTED; -#endif - - std::vector memObjList; - unsigned int planesNum = PlanesNum(surfaceFormat); - std::vector planesList(planesNum); - for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx) - { - planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfo, planeIdx, &error); - if (error != CL_SUCCESS) - { - log_error("clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n", planeIdx, IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - memObjList.push_back(planesList[planeIdx]); - } - - clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(ctx, gDeviceIDdetected, 0, &error ); - if (error != CL_SUCCESS) - { - log_error("Unable to create command queue: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - if (!ImageInfoVerify(adapterType, memObjList, width, height, surface, objectSharedHandle)) - { - log_error("Image info verification failed\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - for (size_t frameIdx = 0; frameIdx < iterationNum; ++frameIdx) - { - if (!YUVSurfaceSet(surfaceFormat, surface, bufferRef1[frameIdx % FRAME_NUM], width, height)) - { - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - } - - error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast(memObjList.size()), &memObjList[0], 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - { //read operation - std::vector out( frameSize, 0 ); - size_t offset = 0; - size_t origin[3] = {0,0,0}; - - for (size_t i = 0; i < memObjList.size(); ++i) - { - size_t planeWidth = (i == 0) ? width: width / 2; - size_t planeHeight = (i == 0) ? height: height / 2; - size_t regionPlane[3] = {planeWidth, planeHeight, 1}; - - error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane, 0, 0, - &out[offset], 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - offset += planeWidth * planeHeight; - } - - if (!YUVCompare(surfaceFormat, out, bufferRef1[frameIdx % FRAME_NUM], width, height)) - { - log_error("Frame idx: %i, OCL image is different then shared OCL object: clEnqueueReadImage\n", frameIdx); - result.ResultSub(CResult::TEST_FAIL); - } - } - - { //write operation - size_t offset = 0; - size_t origin[3] = {0,0,0}; - for (size_t i = 0; i < memObjList.size(); ++i) - { - size_t planeWidth = (i == 0) ? width: width / 2; - size_t planeHeight = (i == 0) ? height: height / 2; - size_t regionPlane[3] = {planeWidth, planeHeight, 1}; - - error = clEnqueueWriteImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane, - 0, 0, &bufferRef2[frameIdx % FRAME_NUM][offset], 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueWriteImage failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - offset += planeWidth * planeHeight; - } - } - - { //read operation - std::vector out( frameSize, 0 ); - size_t offset = 0; - size_t origin[3] = {0,0,0}; - - for (size_t i = 0; i < memObjList.size(); ++i) - { - size_t planeWidth = (i == 0) ? width: width / 2; - size_t planeHeight = (i == 0) ? height: height / 2; - size_t regionPlane[3] = {planeWidth, planeHeight, 1}; - - error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane, 0, 0, - &out[offset], 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - offset += planeWidth * planeHeight; - } - - if (!YUVCompare(surfaceFormat, out, bufferRef2[frameIdx % FRAME_NUM], width, height)) - { - log_error("Frame idx: %i, Shared OCL image verification after clEnqueueWriteImage failed\n", frameIdx); - result.ResultSub(CResult::TEST_FAIL); - } - } - - { //copy operation (shared OCL to OCL) - size_t offset = 0; - size_t origin[3] = {0,0,0}; - std::vector out( frameSize, 0 ); - for (size_t i = 0; i < memObjList.size(); ++i) - { - size_t planeWidth = (i == 0) ? width: width / 2; - size_t planeHeight = (i == 0) ? height: height / 2; - size_t regionPlane[3] = {planeWidth, planeHeight, 1}; - - cl_image_format formatPlane; - formatPlane.image_channel_data_type = CL_UNORM_INT8; - formatPlane.image_channel_order = (surfaceFormat == SURFACE_FORMAT_NV12 && i > 0)? CL_RG: CL_R; - - cl_image_desc imageDesc = {0}; - imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; - imageDesc.image_width = planeWidth; - imageDesc.image_height = planeHeight; - - clMemWrapper planeOCL = clCreateImage(ctx, CL_MEM_READ_WRITE, &formatPlane, &imageDesc, 0, &error); - if (error != CL_SUCCESS) - { - log_error("clCreateImage failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - error = clEnqueueCopyImage(cmdQueue, memObjList[i], planeOCL, origin, origin, regionPlane, 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueCopyImage failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - error = clEnqueueReadImage(cmdQueue, planeOCL, CL_TRUE, origin, regionPlane, 0, 0, &out[offset], 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - offset += planeWidth * planeHeight; - } - - if (!YUVCompare(surfaceFormat, out, bufferRef2[frameIdx % FRAME_NUM], width, height)) - { - log_error("Frame idx: %i, OCL image verification after clEnqueueCopyImage (from shared OCL to OCL) failed\n", frameIdx); - result.ResultSub(CResult::TEST_FAIL); - } - } - - { //copy operation (OCL to shared OCL) - size_t offset = 0; - size_t origin[3] = {0,0,0}; - std::vector out( frameSize, 0 ); - for (size_t i = 0; i < memObjList.size(); ++i) - { - size_t planeWidth = (i == 0) ? width: width / 2; - size_t planeHeight = (i == 0) ? height: height / 2; - size_t regionPlane[3] = {planeWidth, planeHeight, 1}; - size_t pitchSize = ((surfaceFormat == SURFACE_FORMAT_NV12 && i > 0)? width: planeWidth) * sizeof(cl_uchar); - - cl_image_format formatPlane; - formatPlane.image_channel_data_type = CL_UNORM_INT8; - formatPlane.image_channel_order = (surfaceFormat == SURFACE_FORMAT_NV12 && i > 0)? CL_RG: CL_R; - - cl_image_desc imageDesc = {0}; - imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; - imageDesc.image_width = planeWidth; - imageDesc.image_height = planeHeight; - imageDesc.image_row_pitch = pitchSize; - - clMemWrapper planeOCL = clCreateImage(ctx, CL_MEM_COPY_HOST_PTR, &formatPlane, &imageDesc, &bufferRef1[frameIdx % FRAME_NUM][offset], &error); - if (error != CL_SUCCESS) - { - log_error("clCreateImage failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - error = clEnqueueCopyImage(cmdQueue, planeOCL, memObjList[i], origin, origin, regionPlane, 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueCopyImage failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane, 0, 0, &out[offset], 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - offset += planeWidth * planeHeight; - } - - if (!YUVCompare(surfaceFormat, out, bufferRef1[frameIdx % FRAME_NUM], width, height)) - { - log_error("Frame idx: %i, OCL image verification after clEnqueueCopyImage (from OCL to shared OCL) failed\n", frameIdx); - result.ResultSub(CResult::TEST_FAIL); - } - } - - { //copy from image to buffer - size_t offset = 0; - size_t origin[3] = {0,0,0}; - size_t bufferSize = sizeof(cl_uchar) * frameSize; - clMemWrapper buffer = clCreateBuffer( ctx, CL_MEM_READ_WRITE, bufferSize, NULL, &error); - if (error != CL_SUCCESS) - { - log_error("clCreateBuffer failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - for (size_t i = 0; i < memObjList.size(); ++i) - { - size_t planeWidth = (i == 0) ? width: width / 2; - size_t planeHeight = (i == 0) ? height: height / 2; - size_t regionPlane[3] = {planeWidth, planeHeight, 1}; - - error = clEnqueueCopyImageToBuffer(cmdQueue, memObjList[i], buffer, origin, regionPlane, offset, 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueCopyImageToBuffer failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - offset += planeWidth * planeHeight * sizeof(cl_uchar); - } - - std::vector out( frameSize, 0 ); - error = clEnqueueReadBuffer( cmdQueue, buffer, CL_TRUE, 0, bufferSize, &out[0], 0, NULL, NULL ); - if (error != CL_SUCCESS) - { - log_error("Unable to read buffer"); - result.ResultSub(CResult::TEST_FAIL); - } - - if (!YUVCompare(surfaceFormat, out, bufferRef1[frameIdx % FRAME_NUM], width, height)) - { - log_error("Frame idx: %i, OCL buffer verification after clEnqueueCopyImageToBuffer (from shared OCL image to OCL buffer) failed\n", frameIdx); - result.ResultSub(CResult::TEST_FAIL); - } - } - - { //copy buffer to image - size_t bufferSize = sizeof(cl_uchar) * frameSize; - clMemWrapper buffer = clCreateBuffer( ctx, CL_MEM_COPY_HOST_PTR, bufferSize, &bufferRef2[frameIdx % FRAME_NUM][0], &error); - if (error != CL_SUCCESS) - { - log_error("clCreateBuffer failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - size_t offset = 0; - size_t origin[3] = {0,0,0}; - std::vector out( frameSize, 0 ); - for (size_t i = 0; i < memObjList.size(); ++i) - { - size_t planeWidth = (i == 0) ? width: width / 2; - size_t planeHeight = (i == 0) ? height: height / 2; - size_t regionPlane[3] = {planeWidth, planeHeight, 1}; - - error = clEnqueueCopyBufferToImage(cmdQueue, buffer, memObjList[i], offset, origin, regionPlane, 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueCopyBufferToImage failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane, 0, 0, &out[offset], 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - offset += planeWidth * planeHeight * sizeof(cl_uchar); - } - - if (!YUVCompare(surfaceFormat, out, bufferRef2[frameIdx % FRAME_NUM], width, height)) - { - log_error("Frame idx: %i, OCL image verification after clEnqueueCopyBufferToImage (from OCL buffer to shared OCL image) failed\n", frameIdx); - result.ResultSub(CResult::TEST_FAIL); - } - } - - { //map operation to read - size_t offset = 0; - size_t origin[3] = {0,0,0}; - std::vector out( frameSize, 0 ); - for (size_t i = 0; i < memObjList.size(); ++i) - { - size_t planeWidth = (i == 0) ? width: width / 2; - size_t planeHeight = (i == 0) ? height: height / 2; - size_t regionPlane[3] = {planeWidth, planeHeight, 1}; - size_t pitchSize = ((surfaceFormat == SURFACE_FORMAT_NV12 && i > 0)? width: planeWidth); - - size_t rowPitch = 0; - size_t slicePitch = 0; - void *mapPtr = clEnqueueMapImage(cmdQueue, memObjList[i], CL_TRUE, CL_MAP_READ, origin, regionPlane, - &rowPitch, &slicePitch, 0, 0, 0, &error); - if (error != CL_SUCCESS) - { - log_error("clEnqueueMapImage failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - for (size_t y = 0; y < planeHeight; ++y) - memcpy(&out[offset + y * pitchSize], static_cast(mapPtr) + y * rowPitch / sizeof(cl_uchar), pitchSize * sizeof(cl_uchar)); - - error = clEnqueueUnmapMemObject(cmdQueue, memObjList[i], mapPtr, 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueUnmapMemObject failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - offset += pitchSize * planeHeight; - } - - if (!YUVCompare(surfaceFormat, out, bufferRef2[frameIdx % FRAME_NUM], width, height)) - { - log_error("Frame idx: %i, Mapped shared OCL image is different then expected\n", frameIdx); - result.ResultSub(CResult::TEST_FAIL); - } - } - - { //map operation to write - size_t offset = 0; - size_t origin[3] = {0,0,0}; - for (size_t i = 0; i < memObjList.size(); ++i) - { - size_t planeWidth = (i == 0) ? width: width / 2; - size_t planeHeight = (i == 0) ? height: height / 2; - size_t regionPlane[3] = {planeWidth, planeHeight, 1}; - size_t pitchSize = ((surfaceFormat == SURFACE_FORMAT_NV12 && i > 0)? width: planeWidth); - - size_t rowPitch = 0; - size_t slicePitch = 0; - void *mapPtr = clEnqueueMapImage(cmdQueue, memObjList[i], CL_TRUE, CL_MAP_WRITE, origin, regionPlane, - &rowPitch, &slicePitch, 0, 0, 0, &error); - if (error != CL_SUCCESS) - { - log_error("clEnqueueMapImage failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - for (size_t y = 0; y < planeHeight; ++y) - memcpy(static_cast(mapPtr) + y * rowPitch / sizeof(cl_uchar), &bufferRef3[frameIdx % FRAME_NUM][offset + y * pitchSize], pitchSize * sizeof(cl_uchar)); - - error = clEnqueueUnmapMemObject(cmdQueue, memObjList[i], mapPtr, 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueUnmapMemObject failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - offset += pitchSize * planeHeight; - } - } - - error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast(memObjList.size()), &memObjList[0], 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - std::vector bufferOut(frameSize, 0); - if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut, width, height)) - { - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - if (!YUVCompare(surfaceFormat, bufferOut, bufferRef3[frameIdx % FRAME_NUM], width, height)) - { - log_error("Frame idx: %i, media surface is different than expected\n", frameIdx); - result.ResultSub(CResult::TEST_FAIL); - } - } - } - - if (deviceWrapper->Status() != DEVICE_PASS) - { - std::string adapterName; - AdapterToString(adapterType, adapterName); - if (deviceWrapper->Status() == DEVICE_FAIL) - { - log_error("%s init failed\n", adapterName.c_str()); - result.ResultSub(CResult::TEST_FAIL); - } - else - { - log_error("%s init incomplete due to unsupported device\n", adapterName.c_str()); - result.ResultSub(CResult::TEST_NOTSUPPORTED); - } - } - - return result.Result(); -} - -int test_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - CResult result; - -#if defined(_WIN32) - //D3D9 - if(api_functions(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9_KHR, - SURFACE_FORMAT_NV12, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9, NV12, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(api_functions(deviceID, context, queue, num_elements, 3, 512, 256, CL_ADAPTER_D3D9_KHR, - SURFACE_FORMAT_YV12, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9, YV12, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - //D3D9EX - if(api_functions(deviceID, context, queue, num_elements, 5, 256, 512, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_NV12, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9EX, NV12, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(api_functions(deviceID, context, queue, num_elements, 7, 512, 256, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_NV12, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (D3D9EX, NV12, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(api_functions(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_YV12, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9EX, YV12, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(api_functions(deviceID, context, queue, num_elements, 15, 128, 128, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_YV12, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (D3D9EX, YV12, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - //DXVA - if(api_functions(deviceID, context, queue, num_elements, 20, 128, 128, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_NV12, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (DXVA, NV12, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(api_functions(deviceID, context, queue, num_elements, 40, 64, 64, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_NV12, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (DXVA, NV12, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(api_functions(deviceID, context, queue, num_elements, 5, 512, 512, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_YV12, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (DXVA, YV12, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(api_functions(deviceID, context, queue, num_elements, 2, 1024, 1024, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_YV12, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (DXVA, YV12, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - -#else - return TEST_NOT_IMPLEMENTED; -#endif - - return result.Result(); -} diff --git a/test_extensions/media_sharing/test_functions_kernel.cpp b/test_extensions/media_sharing/test_functions_kernel.cpp deleted file mode 100644 index f5c3e2daf3..0000000000 --- a/test_extensions/media_sharing/test_functions_kernel.cpp +++ /dev/null @@ -1,446 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include -#include - -#include "harness/errorHelpers.h" -#include "harness/kernelHelpers.h" - -#include "utils.h" - -int kernel_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, - unsigned int iterationNum, unsigned int width, unsigned int height, cl_dx9_media_adapter_type_khr adapterType, - TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle) -{ - const unsigned int FRAME_NUM = 2; - const cl_uchar MAX_VALUE = 255 / 2; - const std::string PROGRAM_STR = - "__kernel void TestFunction( read_only image2d_t planeIn, write_only image2d_t planeOut, " - NL " sampler_t sampler, __global int *planeRes)" - NL "{" - NL " int w = get_global_id(0);" - NL " int h = get_global_id(1);" - NL " int width = get_image_width(planeIn);" - NL " int height = get_image_height(planeOut);" - NL " float4 color0 = read_imagef(planeIn, sampler, (int2)(w,h)) + 0.2f;" - NL " float4 color1 = read_imagef(planeIn, sampler, (float2)(w,h)) + 0.2f;" - NL " color0 = (color0 == color1) ? color0: (float4)(0.5, 0.5, 0.5, 0.5);" - NL " write_imagef(planeOut, (int2)(w,h), color0);" - NL " if(w == 0 && h == 0)" - NL " {" - NL " planeRes[0] = width;" - NL " planeRes[1] = height;" - NL " }" - NL "}"; - - CResult result; - - std::auto_ptr deviceWrapper; - if (!DeviceCreate(adapterType, deviceWrapper)) - { - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - } - - std::vector > bufferIn(FRAME_NUM); - std::vector > bufferExp(FRAME_NUM); - size_t frameSize = width * height * 3 / 2; - cl_uchar step = MAX_VALUE / FRAME_NUM; - for (size_t i = 0; i < FRAME_NUM; ++i) - { - if (!YUVGenerate(surfaceFormat, bufferIn[i], width, height, static_cast(step * i), static_cast(step * (i + 1))) || - !YUVGenerate(surfaceFormat, bufferExp[i], width, height, static_cast(step * i), static_cast(step * (i + 1)), 0.2)) - { - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - } - } - - while (deviceWrapper->AdapterNext()) - { - cl_int error; - //check if the test can be run on the adapter - if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle))) - { - return result.Result(); - } - - if (surfaceFormat != SURFACE_FORMAT_NV12 && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat)) - { - std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no"; - std::string formatStr; - std::string adapterStr; - SurfaceFormatToString(surfaceFormat, formatStr); - AdapterToString(adapterType, adapterStr); - log_info("Skipping test case, image format is not supported by a device (adapter type: %s, format: %s, shared handle: %s)\n", - adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str()); - return result.Result(); - } - - void *objectSrcHandle = 0; - std::auto_ptr surfaceSrc; - if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surfaceSrc, - (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectSrcHandle)) - { - log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx()); - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - } - - void *objectDstHandle = 0; - std::auto_ptr surfaceDst; - if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surfaceDst, - (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectDstHandle)) - { - log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx()); - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - } - - cl_context_properties contextProperties[] = { - CL_CONTEXT_PLATFORM, (cl_context_properties)gPlatformIDdetected, - AdapterTypeToContextInfo(adapterType), (cl_context_properties)deviceWrapper->Device(), - 0, - }; - - clContextWrapper ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error); - if (error != CL_SUCCESS) - { - log_error("clCreateContext failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - -#if defined(_WIN32) - cl_dx9_surface_info_khr surfaceInfoSrc; - surfaceInfoSrc.resource = *(static_cast(surfaceSrc.get())); - surfaceInfoSrc.shared_handle = objectSrcHandle; - - cl_dx9_surface_info_khr surfaceInfoDst; - surfaceInfoDst.resource = *(static_cast(surfaceDst.get())); - surfaceInfoDst.shared_handle = objectDstHandle; -#else - void *surfaceInfoSrc = 0; - void *surfaceInfoDst = 0; - return TEST_NOT_IMPLEMENTED; -#endif - - std::vector memObjSrcList; - std::vector memObjDstList; - unsigned int planesNum = PlanesNum(surfaceFormat); - std::vector planeSrcList(planesNum); - std::vector planeDstList(planesNum); - for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx) - { - planeSrcList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfoSrc, planeIdx, &error); - if (error != CL_SUCCESS) - { - log_error("clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n", planeIdx, IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - memObjSrcList.push_back(planeSrcList[planeIdx]); - - planeDstList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfoDst, planeIdx, &error); - if (error != CL_SUCCESS) - { - log_error("clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n", planeIdx, IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - memObjDstList.push_back(planeDstList[planeIdx]); - } - - clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(ctx, gDeviceIDdetected, 0, &error ); - if (error != CL_SUCCESS) - { - log_error("Unable to create command queue: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - if (!ImageInfoVerify(adapterType, memObjSrcList, width, height, surfaceSrc, objectSrcHandle)) - { - log_error("Image info verification failed\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - for (size_t frameIdx = 0; frameIdx < iterationNum; ++frameIdx) - { - if (!YUVSurfaceSet(surfaceFormat, surfaceSrc, bufferIn[frameIdx % FRAME_NUM], width, height)) - { - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - } - - error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast(memObjSrcList.size()), &memObjSrcList[0], 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast(memObjDstList.size()), &memObjDstList[0], 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - clSamplerWrapper sampler = clCreateSampler( ctx, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error ); - if(error != CL_SUCCESS) - { - log_error("Unable to create sampler\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - clProgramWrapper program; - clKernelWrapper kernel; - const char *progPtr = PROGRAM_STR.c_str(); - if(create_single_kernel_helper(ctx, &program, &kernel, 1, (const char **)&progPtr, "TestFunction")) - result.ResultSub(CResult::TEST_FAIL); - - size_t bufferSize = sizeof(cl_int) * 2; - clMemWrapper imageRes = clCreateBuffer( ctx, CL_MEM_READ_WRITE, bufferSize, NULL, &error); - if (error != CL_SUCCESS) - { - log_error("clCreateBuffer failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - size_t offset = 0; - size_t origin[3] = {0,0,0}; - std::vector out( frameSize, 0 ); - for (size_t i = 0; i < memObjSrcList.size(); ++i) - { - size_t planeWidth = (i == 0) ? width: width / 2; - size_t planeHeight = (i == 0) ? height: height / 2; - size_t regionPlane[3] = {planeWidth, planeHeight, 1}; - size_t threads[ 2 ] = { planeWidth, planeHeight }; - - error = clSetKernelArg( kernel, 0, sizeof( memObjSrcList[i] ), &memObjSrcList[i] ); - if (error != CL_SUCCESS) - { - log_error("Unable to set kernel arguments" ); - result.ResultSub(CResult::TEST_FAIL); - } - - error = clSetKernelArg( kernel, 1, sizeof( memObjDstList[i] ), &memObjDstList[i] ); - if (error != CL_SUCCESS) - { - log_error("Unable to set kernel arguments" ); - result.ResultSub(CResult::TEST_FAIL); - } - - error = clSetKernelArg( kernel, 2, sizeof( sampler ), &sampler ); - if (error != CL_SUCCESS) - { - log_error("Unable to set kernel arguments" ); - result.ResultSub(CResult::TEST_FAIL); - } - - error = clSetKernelArg( kernel, 3, sizeof( imageRes ), &imageRes ); - if (error != CL_SUCCESS) - { - log_error("Unable to set kernel arguments" ); - result.ResultSub(CResult::TEST_FAIL); - } - - size_t localThreads[ 2 ]; - error = get_max_common_2D_work_group_size( ctx, kernel, threads, localThreads ); - if (error != CL_SUCCESS) - { - log_error("Unable to get work group size to use" ); - result.ResultSub(CResult::TEST_FAIL); - } - - error = clEnqueueNDRangeKernel( cmdQueue, kernel, 2, NULL, threads, localThreads, 0, NULL, NULL ); - if (error != CL_SUCCESS) - { - log_error("Unable to execute test kernel" ); - result.ResultSub(CResult::TEST_FAIL); - } - - std::vector imageResOut(2, 0); - error = clEnqueueReadBuffer( cmdQueue, imageRes, CL_TRUE, 0, bufferSize, &imageResOut[0], 0, NULL, NULL ); - if (error != CL_SUCCESS) - { - log_error("Unable to read buffer"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(imageResOut[0] != planeWidth) - { - log_error("Invalid width value, test = %i, expected = %i\n", imageResOut[0], planeWidth); - result.ResultSub(CResult::TEST_FAIL); - } - - if(imageResOut[1] != planeHeight) - { - log_error("Invalid height value, test = %i, expected = %i\n", imageResOut[1], planeHeight); - result.ResultSub(CResult::TEST_FAIL); - } - - error = clEnqueueReadImage(cmdQueue, memObjDstList[i], CL_TRUE, origin, regionPlane, 0, 0, &out[offset], 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - offset += planeWidth * planeHeight; - } - - if (!YUVCompare(surfaceFormat, out, bufferExp[frameIdx % FRAME_NUM], width, height)) - { - log_error("Frame idx: %i, OCL objects are different than expected\n", frameIdx); - result.ResultSub(CResult::TEST_FAIL); - } - - error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast(memObjSrcList.size()), &memObjSrcList[0], 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast(memObjDstList.size()), &memObjDstList[0], 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - std::vector bufferOut(frameSize, 0); - if (!YUVSurfaceGet(surfaceFormat, surfaceDst, bufferOut, width, height)) - { - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - if (!YUVCompare(surfaceFormat, bufferOut, bufferExp[frameIdx % FRAME_NUM], width, height)) - { - log_error("Frame idx: %i, media surface is different than expected\n", frameIdx); - result.ResultSub(CResult::TEST_FAIL); - } - } - } - - if (deviceWrapper->Status() != DEVICE_PASS) - { - std::string adapterName; - AdapterToString(adapterType, adapterName); - if (deviceWrapper->Status() == DEVICE_FAIL) - { - log_error("%s init failed\n", adapterName.c_str()); - result.ResultSub(CResult::TEST_FAIL); - } - else - { - log_error("%s init incomplete due to unsupported device\n", adapterName.c_str()); - result.ResultSub(CResult::TEST_NOTSUPPORTED); - } - } - - return result.Result(); -} - -int test_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - CResult result; - -#if defined(_WIN32) - //D3D9 - if(kernel_functions(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9_KHR, - SURFACE_FORMAT_NV12, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9, NV12, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(kernel_functions(deviceID, context, queue, num_elements, 3, 256, 256, CL_ADAPTER_D3D9_KHR, - SURFACE_FORMAT_YV12, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9, YV12, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - //D3D9EX - if(kernel_functions(deviceID, context, queue, num_elements, 5, 256, 512, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_NV12, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9EX, NV12, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(kernel_functions(deviceID, context, queue, num_elements, 7, 512, 256, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_NV12, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (D3D9EX, NV12, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(kernel_functions(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_YV12, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9EX, YV12, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(kernel_functions(deviceID, context, queue, num_elements, 15, 128, 128, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_YV12, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (D3D9EX, YV12, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - //DXVA - if(kernel_functions(deviceID, context, queue, num_elements, 20, 128, 128, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_NV12, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (DXVA, NV12, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(kernel_functions(deviceID, context, queue, num_elements, 40, 64, 64, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_NV12, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (DXVA, NV12, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(kernel_functions(deviceID, context, queue, num_elements, 5, 512, 512, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_YV12, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (DXVA, YV12, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(kernel_functions(deviceID, context, queue, num_elements, 2, 1024, 1024, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_YV12, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (DXVA, YV12, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - -#else - return TEST_NOT_IMPLEMENTED; -#endif - - return result.Result(); -} diff --git a/test_extensions/media_sharing/test_get_device_ids.cpp b/test_extensions/media_sharing/test_get_device_ids.cpp deleted file mode 100644 index f8947ea63b..0000000000 --- a/test_extensions/media_sharing/test_get_device_ids.cpp +++ /dev/null @@ -1,196 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "utils.h" - -int get_device_ids(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, - cl_dx9_media_adapter_type_khr adapterType) -{ - CResult result; - - std::auto_ptr deviceWrapper; - if (!DeviceCreate(adapterType, deviceWrapper)) - { - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - } - - cl_uint devicesExpectedNum = 0; - cl_int error = clGetDeviceIDs(gPlatformIDdetected, CL_DEVICE_TYPE_ALL, 0, 0, &devicesExpectedNum); - if (error != CL_SUCCESS || devicesExpectedNum < 1) - { - log_error("clGetDeviceIDs failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - std::vector devicesExpected(devicesExpectedNum); - error = clGetDeviceIDs(gPlatformIDdetected, CL_DEVICE_TYPE_ALL, devicesExpectedNum, &devicesExpected[0], 0); - if (error != CL_SUCCESS) - { - log_error("clGetDeviceIDs failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - while (deviceWrapper->AdapterNext()) - { - std::vector mediaAdapterTypes; - mediaAdapterTypes.push_back(adapterType); - - std::vector mediaDevices; - mediaDevices.push_back(deviceWrapper->Device()); - - //check if the test can be run on the adapter - if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result))) - { - return result.Result(); - } - - cl_uint devicesAllNum = 0; - error = clGetDeviceIDsFromDX9MediaAdapterKHR(gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0], - CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, 0, 0, &devicesAllNum); - if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND) - { - log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - std::vector devicesAll; - if (devicesAllNum > 0) - { - devicesAll.resize(devicesAllNum); - error = clGetDeviceIDsFromDX9MediaAdapterKHR(gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0], - CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, devicesAllNum, &devicesAll[0], 0); - if (error != CL_SUCCESS) - { - log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - } - - cl_uint devicesPreferredNum = 0; - error = clGetDeviceIDsFromDX9MediaAdapterKHR(gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0], - CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, 0, 0, &devicesPreferredNum); - if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND) - { - log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - std::vector devicesPreferred; - if (devicesPreferredNum > 0) - { - devicesPreferred.resize(devicesPreferredNum); - error = clGetDeviceIDsFromDX9MediaAdapterKHR(gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0], - CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, devicesPreferredNum, &devicesPreferred[0], 0); - if (error != CL_SUCCESS) - { - log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - } - - if (devicesAllNum < devicesPreferredNum) - { - log_error("Invalid number of preferred devices. It should be a subset of all devices\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - for (cl_uint i = 0; i < devicesPreferredNum; ++i) - { - cl_uint j = 0; - for (; j < devicesAllNum; ++j) - { - if (devicesPreferred[i] == devicesAll[j]) - break; - } - - if (j == devicesAllNum) - { - log_error("Preferred device is not a subset of all devices\n"); - result.ResultSub(CResult::TEST_FAIL); - } - } - - for (cl_uint i = 0; i < devicesAllNum; ++i) - { - cl_uint j = 0; - for (; j < devicesExpectedNum; ++j) - { - if (devicesAll[i] == devicesExpected[j]) - break; - } - - if (j == devicesExpectedNum) - { - log_error("CL_ALL_DEVICES_FOR_MEDIA_ADAPTER_KHR should be a subset of all devices for selected platform\n"); - result.ResultSub(CResult::TEST_FAIL); - } - } - } - - if (deviceWrapper->Status() != DEVICE_PASS) - { - std::string adapterName; - AdapterToString(adapterType, adapterName); - if (deviceWrapper->Status() == DEVICE_FAIL) - { - log_error("%s init failed\n", adapterName.c_str()); - result.ResultSub(CResult::TEST_FAIL); - } - else - { - log_error("%s init incomplete due to unsupported device\n", adapterName.c_str()); - result.ResultSub(CResult::TEST_NOTSUPPORTED); - } - } - - return result.Result(); -} - -int test_get_device_ids(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - CResult result; - -#if defined(_WIN32) - if(get_device_ids(deviceID, context, queue, num_elements, CL_ADAPTER_D3D9_KHR) != 0) - { - log_error("\nTest case (D3D9) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(get_device_ids(deviceID, context, queue, num_elements, CL_ADAPTER_D3D9EX_KHR) != 0) - { - log_error("\nTest case (D3D9EX) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(get_device_ids(deviceID, context, queue, num_elements, CL_ADAPTER_DXVA_KHR) != 0) - { - log_error("\nTest case (DXVA) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - -#else - return TEST_NOT_IMPLEMENTED; -#endif - - return result.Result(); -} diff --git a/test_extensions/media_sharing/test_interop_sync.cpp b/test_extensions/media_sharing/test_interop_sync.cpp deleted file mode 100644 index 6831a14da1..0000000000 --- a/test_extensions/media_sharing/test_interop_sync.cpp +++ /dev/null @@ -1,357 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "utils.h" - -int interop_user_sync(cl_device_id deviceID, cl_context context, cl_command_queue queue, - int num_elements, unsigned int width, unsigned int height, - TContextFuncType functionCreate, cl_dx9_media_adapter_type_khr adapterType, - TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle, cl_bool userSync) -{ - CResult result; - - //create device - std::auto_ptr deviceWrapper; - if (!DeviceCreate(adapterType, deviceWrapper)) - { - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - } - - //generate input data - std::vector bufferIn(width * height * 3 / 2, 0); - if(!YUVGenerate(surfaceFormat, bufferIn, width, height, 0, 255)) - { - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - } - - while (deviceWrapper->AdapterNext()) - { - cl_int error; - //check if the test can be run on the adapter - if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle))) - { - return result.Result(); - } - - if (surfaceFormat != SURFACE_FORMAT_NV12 && - !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat)) - { - std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no"; - std::string syncStr = (userSync == CL_TRUE) ? "yes": "no"; - std::string formatStr; - std::string adapterStr; - SurfaceFormatToString(surfaceFormat, formatStr); - AdapterToString(adapterType, adapterStr); - log_info("Skipping test case, image format is not supported by a device (adapter type: %s, format: %s, shared handle: %s, user sync: %s)\n", - adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str(), syncStr.c_str()); - return result.Result(); - } - - void *objectSharedHandle = 0; - std::auto_ptr surface; - if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surface, - (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectSharedHandle)) - { - log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx()); - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - } - - cl_context_properties contextProperties[] = { - CL_CONTEXT_PLATFORM, (cl_context_properties)gPlatformIDdetected, - AdapterTypeToContextInfo(adapterType), (cl_context_properties)deviceWrapper->Device(), - CL_CONTEXT_INTEROP_USER_SYNC, userSync, - 0, - }; - - - clContextWrapper ctx; - switch(functionCreate) - { - case CONTEXT_CREATE_DEFAULT: - ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error); - break; - case CONTEXT_CREATE_FROM_TYPE: - ctx = clCreateContextFromType(&contextProperties[0], gDeviceTypeSelected, NULL, NULL, &error); - break; - default: - log_error("Unknown context creation function enum\n"); - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - break; - } - - if (error != CL_SUCCESS) - { - std::string functionName; - FunctionContextCreateToString(functionCreate, functionName); - log_error("%s failed: %s\n", functionName.c_str(), IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - if (!YUVSurfaceSet(surfaceFormat, surface, bufferIn, width, height)) - { - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - } - -#if defined(_WIN32) - cl_dx9_surface_info_khr surfaceInfo; - surfaceInfo.resource = *(static_cast(surface.get())); - surfaceInfo.shared_handle = objectSharedHandle; -#else - void *surfaceInfo = 0; - return TEST_NOT_IMPLEMENTED; -#endif - - std::vector memObjList; - unsigned int planesNum = PlanesNum(surfaceFormat); - std::vector planesList(planesNum); - for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx) - { - planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfo, planeIdx, &error); - if (error != CL_SUCCESS) - { - log_error("clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n", planeIdx, IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - memObjList.push_back(planesList[planeIdx]); - } - - clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(ctx, gDeviceIDdetected, 0, &error ); - if (error != CL_SUCCESS) - { - log_error("Unable to create command queue: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - if (!ImageInfoVerify(adapterType, memObjList, width, height, surface, objectSharedHandle)) - { - log_error("Image info verification failed\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if (userSync == CL_TRUE) - { - #if defined(_WIN32) - IDirect3DQuery9* eventQuery = NULL; - switch (adapterType) - { - case CL_ADAPTER_D3D9_KHR: - { - LPDIRECT3DDEVICE9 device = (LPDIRECT3DDEVICE9)deviceWrapper->Device(); - device->CreateQuery(D3DQUERYTYPE_EVENT, &eventQuery); - eventQuery->Issue(D3DISSUE_END); - - while (S_FALSE == eventQuery->GetData(NULL, 0, D3DGETDATA_FLUSH)) - ; - } - break; - case CL_ADAPTER_D3D9EX_KHR: - { - LPDIRECT3DDEVICE9EX device = (LPDIRECT3DDEVICE9EX)deviceWrapper->Device(); - device->CreateQuery(D3DQUERYTYPE_EVENT, &eventQuery); - eventQuery->Issue(D3DISSUE_END); - - while (S_FALSE == eventQuery->GetData(NULL, 0, D3DGETDATA_FLUSH)) - ; - } - break; - case CL_ADAPTER_DXVA_KHR: - { - CDXVAWrapper *DXVADevice = dynamic_cast(&(*deviceWrapper)); - LPDIRECT3DDEVICE9EX device = (LPDIRECT3DDEVICE9EX)(DXVADevice->D3D9()).Device(); - device->CreateQuery(D3DQUERYTYPE_EVENT, &eventQuery); - eventQuery->Issue(D3DISSUE_END); - - while (S_FALSE == eventQuery->GetData(NULL, 0, D3DGETDATA_FLUSH)) - ; - } - break; - default: - log_error("Unknown adapter type\n"); - return false; - break; - } - if(eventQuery) - { - eventQuery->Release(); - } -#else - return TEST_NOT_IMPLEMENTED; -#endif - } - - error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast(memObjList.size()), &memObjList.at(0), 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - size_t origin[3] = {0,0,0}; - size_t offset = 0; - size_t frameSize = width * height * 3 / 2; - std::vector out( frameSize, 0 ); - for (size_t i = 0; i < memObjList.size(); ++i) - { - size_t planeWidth = (i == 0) ? width: width / 2; - size_t planeHeight = (i == 0) ? height: height / 2; - size_t regionPlane[3] = {planeWidth, planeHeight, 1}; - - error = clEnqueueReadImage(cmdQueue, memObjList.at(i), CL_TRUE, origin, regionPlane, 0, 0, &out.at(offset), 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - offset += planeWidth * planeHeight; - } - - if (!YUVCompare(surfaceFormat, out, bufferIn, width, height)) - { - log_error("OCL object verification failed - clEnqueueReadImage\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast(memObjList.size()), &memObjList.at(0), 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - if (userSync == CL_TRUE) - { - error = clFinish(cmdQueue); - if (error != CL_SUCCESS) - { - log_error("clFinish failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - } - - //shared object verification - std::vector bufferOut(frameSize, 0); - if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut, width, height)) - { - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - if (!YUVCompare(surfaceFormat, bufferOut, bufferIn, width, height)) - { - log_error("Media surface is different than expected\n"); - result.ResultSub(CResult::TEST_FAIL); - } - } - - if (deviceWrapper->Status() != DEVICE_PASS) - { - std::string adapterName; - AdapterToString(adapterType, adapterName); - - if (deviceWrapper->Status() == DEVICE_FAIL) - { - log_error("%s init failed\n", adapterName.c_str()); - result.ResultSub(CResult::TEST_FAIL); - } - else - { - log_error("%s init incomplete due to unsupported device\n", adapterName.c_str()); - result.ResultSub(CResult::TEST_NOTSUPPORTED); - } - } - - return result.Result(); -} - -int test_interop_user_sync(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - const unsigned int WIDTH = 256; - const unsigned int HEIGHT = 256; - - std::vector adapters; -#if defined(_WIN32) - adapters.push_back(CL_ADAPTER_D3D9_KHR); - adapters.push_back(CL_ADAPTER_D3D9EX_KHR); - adapters.push_back(CL_ADAPTER_DXVA_KHR); -#else - return TEST_NOT_IMPLEMENTED; -#endif - - std::vector contextFuncs; - contextFuncs.push_back(CONTEXT_CREATE_DEFAULT); - contextFuncs.push_back(CONTEXT_CREATE_FROM_TYPE); - - std::vector formats; - formats.push_back(SURFACE_FORMAT_NV12); - formats.push_back(SURFACE_FORMAT_YV12); - - std::vector sharedHandleTypes; - sharedHandleTypes.push_back(SHARED_HANDLE_DISABLED); - sharedHandleTypes.push_back(SHARED_HANDLE_ENABLED); - - std::vector sync; - sync.push_back(CL_FALSE); - sync.push_back(CL_TRUE); - - CResult result; - for (size_t adapterIdx = 0; adapterIdx < adapters.size(); ++adapterIdx) - { - //iteration through all create context functions - for (size_t contextFuncIdx = 0; contextFuncIdx < contextFuncs.size(); ++contextFuncIdx) - { - //iteration through YUV formats - for (size_t formatIdx = 0; formatIdx < formats.size(); ++formatIdx) - { - //shared handle enabled or disabled - for (size_t sharedHandleIdx = 0; sharedHandleIdx < sharedHandleTypes.size(); ++sharedHandleIdx) - { - //user sync interop disabled or enabled - for (size_t syncIdx = 0; syncIdx < sync.size(); ++syncIdx) - { - if (adapters[adapterIdx] == CL_ADAPTER_D3D9_KHR && sharedHandleTypes[sharedHandleIdx] == SHARED_HANDLE_ENABLED) - continue; - - if(interop_user_sync(deviceID, context, queue, num_elements, WIDTH, HEIGHT, - contextFuncs[contextFuncIdx], adapters[adapterIdx], formats[formatIdx], - sharedHandleTypes[sharedHandleIdx], sync[syncIdx]) != 0) - { - std::string syncStr = (sync[syncIdx] == CL_TRUE) ? "user sync enabled": "user sync disabled"; - std::string sharedHandle = (sharedHandleTypes[sharedHandleIdx] == SHARED_HANDLE_ENABLED)? "shared handle": "no shared handle"; - std::string adapterStr; - std::string formatStr; - SurfaceFormatToString(formats[formatIdx], formatStr); - AdapterToString(adapters[adapterIdx], adapterStr); - - log_error("\nTest case - clCreateContext (%s, %s, %s, %s) failed\n\n", adapterStr.c_str(), formatStr.c_str(), sharedHandle.c_str(), syncStr.c_str()); - result.ResultSub(CResult::TEST_FAIL); - } - } - } - } - } - } - - return result.Result(); -} diff --git a/test_extensions/media_sharing/test_memory_access.cpp b/test_extensions/media_sharing/test_memory_access.cpp deleted file mode 100644 index 5aabaf6f0b..0000000000 --- a/test_extensions/media_sharing/test_memory_access.cpp +++ /dev/null @@ -1,468 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "utils.h" - -int memory_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, - unsigned int width, unsigned int height, cl_dx9_media_adapter_type_khr adapterType, - TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle) -{ - CResult result; - - std::auto_ptr deviceWrapper; - //creates device - if (!DeviceCreate(adapterType, deviceWrapper)) - { - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - } - - //generate input and expected data - size_t frameSize = width * height * 3 / 2; - std::vector bufferRef0(frameSize, 0); - std::vector bufferRef1(frameSize, 0); - std::vector bufferRef2(frameSize, 0); - if (!YUVGenerate(surfaceFormat, bufferRef0, width, height, 0, 90) || - !YUVGenerate(surfaceFormat, bufferRef1, width, height, 91, 180) || - !YUVGenerate(surfaceFormat, bufferRef2, width, height, 181, 255)) - { - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - } - - //iterates through all devices - while (deviceWrapper->AdapterNext()) - { - cl_int error; - //check if the test can be run on the adapter - if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle))) - { - return result.Result(); - } - - if (surfaceFormat != SURFACE_FORMAT_NV12 && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat)) - { - std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no"; - std::string formatStr; - std::string adapterStr; - SurfaceFormatToString(surfaceFormat, formatStr); - AdapterToString(adapterType, adapterStr); - log_info("Skipping test case, image format is not supported by a device (adapter type: %s, format: %s, shared handle: %s)\n", - adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str()); - return result.Result(); - } - - void *objectSharedHandle = 0; - std::auto_ptr surface; - - //creates surface - if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surface, - (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectSharedHandle)) - { - log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx()); - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - } - - if (!YUVSurfaceSet(surfaceFormat, surface, bufferRef0, width, height)) - { - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - } - - cl_context_properties contextProperties[] = { - CL_CONTEXT_PLATFORM, (cl_context_properties)gPlatformIDdetected, - AdapterTypeToContextInfo(adapterType), (cl_context_properties)deviceWrapper->Device(), - 0, - }; - - clContextWrapper ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error); - if (error != CL_SUCCESS) - { - log_error("clCreateContext failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(ctx, gDeviceIDdetected, 0, &error ); - if (error != CL_SUCCESS) - { - log_error("Unable to create command queue: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - { //memory access write -#if defined(_WIN32) - cl_dx9_surface_info_khr surfaceInfo; - surfaceInfo.resource = *(static_cast(surface.get())); - surfaceInfo.shared_handle = objectSharedHandle; -#else - void *surfaceInfo = 0; - return TEST_NOT_IMPLEMENTED; -#endif - - std::vector memObjList; - unsigned int planesNum = PlanesNum(surfaceFormat); - std::vector planesList(planesNum); - for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx) - { - planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_WRITE_ONLY, adapterType, &surfaceInfo, planeIdx, &error); - if (error != CL_SUCCESS) - { - log_error("clCreateFromDX9MediaSurfaceKHR failed for WRITE_ONLY plane %i: %s\n", planeIdx, IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - memObjList.push_back(planesList[planeIdx]); - } - - error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast(memObjList.size()), &memObjList[0], 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - size_t offset = 0; - size_t origin[3] = {0,0,0}; - for (size_t i = 0; i < memObjList.size(); ++i) - { - size_t planeWidth = (i == 0) ? width: width / 2; - size_t planeHeight = (i == 0) ? height: height / 2; - size_t regionPlane[3] = {planeWidth, planeHeight, 1}; - - error = clEnqueueWriteImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane, - 0, 0, &bufferRef1[offset], 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueWriteImage failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - offset += planeWidth * planeHeight; - } - - error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast(memObjList.size()), &memObjList[0], 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - } - - std::vector bufferOut0(frameSize, 0); - if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut0, width, height)) - { - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - if (!YUVCompare(surfaceFormat, bufferOut0, bufferRef1, width, height)) - { - log_error("Media surface is different than expected\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - { //memory access read -#if defined(_WIN32) - cl_dx9_surface_info_khr surfaceInfo; - surfaceInfo.resource = *(static_cast(surface.get())); - surfaceInfo.shared_handle = objectSharedHandle; -#else - void *surfaceInfo = 0; - return TEST_NOT_IMPLEMENTED; -#endif - - std::vector memObjList; - unsigned int planesNum = PlanesNum(surfaceFormat); - std::vector planesList(planesNum); - for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx) - { - planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_ONLY, adapterType, &surfaceInfo, planeIdx, &error); - if (error != CL_SUCCESS) - { - log_error("clCreateFromDX9MediaSurfaceKHR failed for READ_ONLY plane %i: %s\n", planeIdx, IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - memObjList.push_back(planesList[planeIdx]); - } - - error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast(memObjList.size()), &memObjList[0], 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - std::vector out( frameSize, 0 ); - size_t offset = 0; - size_t origin[3] = {0,0,0}; - - for (size_t i = 0; i < memObjList.size(); ++i) - { - size_t planeWidth = (i == 0) ? width: width / 2; - size_t planeHeight = (i == 0) ? height: height / 2; - size_t regionPlane[3] = {planeWidth, planeHeight, 1}; - - error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane, 0, 0, - &out[offset], 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - offset += planeWidth * planeHeight; - } - - if (!YUVCompare(surfaceFormat, out, bufferRef1, width, height)) - { - log_error("OCL image (READ_ONLY) is different then expected\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast(memObjList.size()), &memObjList[0], 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - } - - std::vector bufferOut1(frameSize, 0); - if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut1, width, height)) - { - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - if (!YUVCompare(surfaceFormat, bufferOut1, bufferRef1, width, height)) - { - log_error("Media surface is different than expected\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - { //memory access read write -#if defined(_WIN32) - cl_dx9_surface_info_khr surfaceInfo; - surfaceInfo.resource = *(static_cast(surface.get())); - surfaceInfo.shared_handle = objectSharedHandle; -#else - void *surfaceInfo = 0; - return TEST_NOT_IMPLEMENTED; -#endif - - std::vector memObjList; - unsigned int planesNum = PlanesNum(surfaceFormat); - std::vector planesList(planesNum); - for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx) - { - planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfo, planeIdx, &error); - if (error != CL_SUCCESS) - { - log_error("clCreateFromDX9MediaSurfaceKHR failed for READ_WRITE plane %i: %s\n", planeIdx, IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - memObjList.push_back(planesList[planeIdx]); - } - - error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast(memObjList.size()), &memObjList[0], 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - { //read - std::vector out( frameSize, 0 ); - size_t offset = 0; - size_t origin[3] = {0,0,0}; - - for (size_t i = 0; i < memObjList.size(); ++i) - { - size_t planeWidth = (i == 0) ? width: width / 2; - size_t planeHeight = (i == 0) ? height: height / 2; - size_t regionPlane[3] = {planeWidth, planeHeight, 1}; - - error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane, 0, 0, - &out[offset], 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - offset += planeWidth * planeHeight; - } - - if (!YUVCompare(surfaceFormat, out, bufferRef1, width, height)) - { - log_error("OCL image (READ_WRITE) is different then expected\n"); - result.ResultSub(CResult::TEST_FAIL); - } - } - - { //write - size_t offset = 0; - size_t origin[3] = {0,0,0}; - for (size_t i = 0; i < memObjList.size(); ++i) - { - size_t planeWidth = (i == 0) ? width: width / 2; - size_t planeHeight = (i == 0) ? height: height / 2; - size_t regionPlane[3] = {planeWidth, planeHeight, 1}; - - error = clEnqueueWriteImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane, - 0, 0, &bufferRef2[offset], 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueWriteImage failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - offset += planeWidth * planeHeight; - } - } - - error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast(memObjList.size()), &memObjList[0], 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - } - - std::vector bufferOut2(frameSize, 0); - if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut2, width, height)) - { - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - if (!YUVCompare(surfaceFormat, bufferOut2, bufferRef2, width, height)) - { - log_error("Media surface is different than expected\n"); - result.ResultSub(CResult::TEST_FAIL); - } - } - - if (deviceWrapper->Status() != DEVICE_PASS) - { - std::string adapterName; - AdapterToString(adapterType, adapterName); - if (deviceWrapper->Status() == DEVICE_FAIL) - { - log_error("%s init failed\n", adapterName.c_str()); - result.ResultSub(CResult::TEST_FAIL); - } - else - { - log_error("%s init incomplete due to unsupported device\n", adapterName.c_str()); - result.ResultSub(CResult::TEST_NOTSUPPORTED); - } - } - - return result.Result(); -} - -int test_memory_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - CResult result; - -#if defined(_WIN32) - //D3D9 - if(memory_access(deviceID, context, queue, num_elements, 256, 256, CL_ADAPTER_D3D9_KHR, - SURFACE_FORMAT_NV12, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9, NV12, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(memory_access(deviceID, context, queue, num_elements, 512, 256, CL_ADAPTER_D3D9_KHR, - SURFACE_FORMAT_YV12, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9, YV12, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - //D3D9EX - if(memory_access(deviceID, context, queue, num_elements, 256, 512, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_NV12, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9EX, NV12, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(memory_access(deviceID, context, queue, num_elements, 512, 256, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_NV12, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (D3D9EX, NV12, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(memory_access(deviceID, context, queue, num_elements, 256, 256, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_YV12, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9EX, YV12, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(memory_access(deviceID, context, queue, num_elements, 128, 128, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_YV12, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (D3D9EX, YV12, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - //DXVA - if(memory_access(deviceID, context, queue, num_elements, 128, 128, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_NV12, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (DXVA, NV12, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(memory_access(deviceID, context, queue, num_elements, 64, 64, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_NV12, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (DXVA, NV12, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(memory_access(deviceID, context, queue, num_elements, 512, 512, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_YV12, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (DXVA, YV12, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(memory_access(deviceID, context, queue, num_elements, 1024, 1024, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_YV12, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (DXVA, YV12, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - -#else - return TEST_NOT_IMPLEMENTED; -#endif - - return result.Result(); -} diff --git a/test_extensions/media_sharing/test_other_data_types.cpp b/test_extensions/media_sharing/test_other_data_types.cpp deleted file mode 100644 index 8a73866e1e..0000000000 --- a/test_extensions/media_sharing/test_other_data_types.cpp +++ /dev/null @@ -1,1023 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include -#include - -#include "harness/errorHelpers.h" -#include "harness/imageHelpers.h" -#include "harness/kernelHelpers.h" - -#include "utils.h" - -template -int other_data_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, - unsigned int iterationNum, unsigned int width, unsigned int height, cl_dx9_media_adapter_type_khr adapterType, - TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle) -{ - const unsigned int FRAME_NUM = 2; - const float MAX_VALUE = 0.6f; - const std::string PROGRAM_STR = - "__kernel void TestFunction( read_only image2d_t imageIn, write_only image2d_t imageOut, " - NL " sampler_t sampler, __global int *imageRes)" - NL "{" - NL " int w = get_global_id(0);" - NL " int h = get_global_id(1);" - NL " int width = get_image_width(imageIn);" - NL " int height = get_image_height(imageOut);" - NL " float4 color0 = read_imagef(imageIn, sampler, (int2)(w,h)) - 0.2f;" - NL " float4 color1 = read_imagef(imageIn, sampler, (float2)(w,h)) - 0.2f;" - NL " color0 = (color0 == color1) ? color0: (float4)(0.5, 0.5, 0.5, 0.5);" - NL " write_imagef(imageOut, (int2)(w,h), color0);" - NL " if(w == 0 && h == 0)" - NL " {" - NL " imageRes[0] = width;" - NL " imageRes[1] = height;" - NL " }" - NL "}"; - - CResult result; - - cl_image_format format; - if(!SurfaceFormatToOCL(surfaceFormat, format)) - { - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - } - - std::auto_ptr deviceWrapper; - if (!DeviceCreate(adapterType, deviceWrapper)) - { - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - } - - while (deviceWrapper->AdapterNext()) - { - cl_int error; - //check if the test can be run on the adapter - if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle))) - { - return result.Result(); - } - - cl_context_properties contextProperties[] = { - CL_CONTEXT_PLATFORM, (cl_context_properties)gPlatformIDdetected, - AdapterTypeToContextInfo(adapterType), (cl_context_properties)deviceWrapper->Device(), - 0, - }; - - clContextWrapper ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error); - if (error != CL_SUCCESS) - { - log_error("clCreateContext failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(ctx, gDeviceIDdetected, 0, &error ); - if (error != CL_SUCCESS) - { - log_error("Unable to create command queue: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - if (!SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat)) - { - std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no"; - std::string formatStr; - std::string adapterStr; - SurfaceFormatToString(surfaceFormat, formatStr); - AdapterToString(adapterType, adapterStr); - log_info("Skipping test case, image format is not supported by a device (adapter type: %s, format: %s, shared handle: %s)\n", - adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str()); - return result.Result(); - } - - if(!ImageFormatCheck(ctx, CL_MEM_OBJECT_IMAGE2D, format)) - { - std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no"; - std::string formatStr; - std::string adapterStr; - SurfaceFormatToString(surfaceFormat, formatStr); - AdapterToString(adapterType, adapterStr); - log_info("Skipping test case, image format is not supported by OCL (adapter type: %s, format: %s, shared handle: %s)\n", - adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str()); - return result.Result(); - } - - if (format.image_channel_data_type == CL_HALF_FLOAT) - { - if (DetectFloatToHalfRoundingMode(cmdQueue)) - { - log_error("Unable to detect rounding mode\n"); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - } - - std::vector > bufferIn(FRAME_NUM); - std::vector > bufferExp(FRAME_NUM); - float step = MAX_VALUE / static_cast(FRAME_NUM); - unsigned int planeNum = ChannelNum(surfaceFormat); - for (size_t i = 0; i < FRAME_NUM; ++i) - { - DataGenerate(surfaceFormat, format.image_channel_data_type, bufferIn[i], width, height, planeNum, step * i, step * (i + 1)); - DataGenerate(surfaceFormat, format.image_channel_data_type, bufferExp[i], width, height, planeNum, step * i, step * (i + 1), 0.2f); - } - - void *objectSrcHandle = 0; - std::auto_ptr surfaceSrc; - if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surfaceSrc, - (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectSrcHandle)) - { - log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx()); - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - } - - void *objectDstHandle = 0; - std::auto_ptr surfaceDst; - if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surfaceDst, - (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectDstHandle)) - { - log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx()); - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - } - -#if defined(_WIN32) - cl_dx9_surface_info_khr surfaceSrcInfo; - CD3D9SurfaceWrapper *dx9SurfaceSrc = (static_cast(surfaceSrc.get())); - surfaceSrcInfo.resource = *dx9SurfaceSrc; - surfaceSrcInfo.shared_handle = objectSrcHandle; - - cl_dx9_surface_info_khr surfaceDstInfo; - CD3D9SurfaceWrapper *dx9SurfaceDst = (static_cast(surfaceDst.get())); - surfaceDstInfo.resource = *dx9SurfaceDst; - surfaceDstInfo.shared_handle = objectDstHandle; -#else - void *surfaceSrcInfo = 0; - void *surfaceDstInfo = 0; - return TEST_NOT_IMPLEMENTED; -#endif - - //create OCL shared object - clMemWrapper objectSrcShared = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceSrcInfo, 0, &error); - if (error != CL_SUCCESS) - { - log_error("clCreateFromDX9MediaSurfaceKHR failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - clMemWrapper objectDstShared = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceDstInfo, 0, &error); - if (error != CL_SUCCESS) - { - log_error("clCreateFromDX9MediaSurfaceKHR failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - std::vector memObjList; - memObjList.push_back(objectSrcShared); - memObjList.push_back(objectDstShared); - - if (!GetMemObjInfo(objectSrcShared, adapterType, surfaceSrc, objectSrcHandle)) - { - log_error("Invalid memory object info\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if (!GetImageInfo(objectSrcShared, format, sizeof(T) * planeNum, - width * sizeof(T) * planeNum, 0, width, height, 0, 0)) - { - log_error("clGetImageInfo failed\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - for (size_t frameIdx = 0; frameIdx < iterationNum; ++frameIdx) - { - //surface set -#if defined(_WIN32) - D3DLOCKED_RECT rect; - if (FAILED((*dx9SurfaceSrc)->LockRect(&rect, NULL, 0))) - { - log_error("Surface lock failed\n"); - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - } - - size_t pitch = rect.Pitch / sizeof(T); - size_t lineSize = width * planeNum * sizeof(T); - T *ptr = static_cast(rect.pBits); - - for (size_t y = 0; y < height; ++y) - memcpy(ptr + y * pitch, &bufferIn[frameIdx % FRAME_NUM][y * width * planeNum], lineSize); - - (*dx9SurfaceSrc)->UnlockRect(); -#else - void *surfaceInfo = 0; - return TEST_NOT_IMPLEMENTED; -#endif - - error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast(memObjList.size()), &memObjList[0], 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueAcquireMediaSurfaceKHR failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); - } - - size_t origin[3] = {0,0,0}; - size_t region[3] = {width, height, 1}; - - { //read operation - std::vector out( planeNum * width * height, 0 ); - error = clEnqueueReadImage(cmdQueue, objectSrcShared, CL_TRUE, origin, region, 0, 0, &out[0], 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - if (!DataCompare(surfaceFormat, format.image_channel_data_type, out, bufferIn[frameIdx % FRAME_NUM], width, height, planeNum)) - { - log_error("Frame idx: %i, OCL object is different then expected\n", frameIdx); - result.ResultSub(CResult::TEST_FAIL); - } - } - - { //write operation - error = clEnqueueWriteImage(cmdQueue, objectSrcShared, CL_TRUE, origin, region, - 0, 0, &bufferExp[frameIdx % FRAME_NUM][0], 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueWriteImage failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - } - - { //kernel operations - clSamplerWrapper sampler = clCreateSampler( ctx, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error ); - if(error != CL_SUCCESS) - { - log_error("Unable to create sampler\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - size_t threads[ 2 ] = { width, height }; - clProgramWrapper program; - clKernelWrapper kernel; - const char *progPtr = PROGRAM_STR.c_str(); - if(create_single_kernel_helper(ctx, &program, &kernel, 1, (const char **)&progPtr, "TestFunction")) - result.ResultSub(CResult::TEST_FAIL); - - error = clSetKernelArg( kernel, 0, sizeof( objectSrcShared ), &(objectSrcShared) ); - if (error != CL_SUCCESS) - { - log_error("Unable to set kernel arguments" ); - result.ResultSub(CResult::TEST_FAIL); - } - - error = clSetKernelArg( kernel, 1, sizeof( objectDstShared ), &(objectDstShared) ); - if (error != CL_SUCCESS) - { - log_error("Unable to set kernel arguments" ); - result.ResultSub(CResult::TEST_FAIL); - } - - error = clSetKernelArg( kernel, 2, sizeof( sampler ), &sampler ); - if (error != CL_SUCCESS) - { - log_error("Unable to set kernel arguments" ); - result.ResultSub(CResult::TEST_FAIL); - } - - size_t bufferSize = sizeof(cl_int) * 2; - clMemWrapper imageRes = clCreateBuffer( ctx, CL_MEM_READ_WRITE, bufferSize, NULL, &error); - if (error != CL_SUCCESS) - { - log_error("clCreateBuffer failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - error = clSetKernelArg( kernel, 3, sizeof( imageRes ), &imageRes ); - - size_t localThreads[ 2 ]; - error = get_max_common_2D_work_group_size( ctx, kernel, threads, localThreads ); - if (error != CL_SUCCESS) - { - log_error("Unable to get work group size to use" ); - result.ResultSub(CResult::TEST_FAIL); - } - - error = clEnqueueNDRangeKernel( cmdQueue, kernel, 2, NULL, threads, localThreads, 0, NULL, NULL ); - if (error != CL_SUCCESS) - { - log_error("Unable to execute test kernel" ); - result.ResultSub(CResult::TEST_FAIL); - } - - std::vector imageResOut(2, 0); - error = clEnqueueReadBuffer( cmdQueue, imageRes, CL_TRUE, 0, bufferSize, &imageResOut[0], 0, NULL, NULL ); - if (error != CL_SUCCESS) - { - log_error("Unable to read buffer"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(imageResOut[0] != width) - { - log_error("Invalid width value, test = %i, expected = %i\n", imageResOut[0], width); - result.ResultSub(CResult::TEST_FAIL); - } - - if(imageResOut[1] != height) - { - log_error("Invalid height value, test = %i, expected = %i\n", imageResOut[1], height); - result.ResultSub(CResult::TEST_FAIL); - } - } - - { //map operation - size_t mapOrigin[3] = {0,0,0}; - size_t mapRegion[3] = {width, height, 1}; - - std::vector out( width * height * planeNum, 0 ); - size_t rowPitch = 0; - size_t slicePitch = 0; - void *mapPtr = clEnqueueMapImage(cmdQueue, objectDstShared, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, mapOrigin, mapRegion, - &rowPitch, &slicePitch, 0, 0, 0, &error); - if (error != CL_SUCCESS) - { - log_error("clEnqueueMapImage failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - for (size_t y = 0; y < height; ++y) - memcpy(&out[y * width * planeNum], static_cast(mapPtr) + y * rowPitch / sizeof(T), - width * planeNum * sizeof(T)); - - if (!DataCompare(surfaceFormat, format.image_channel_data_type, out, bufferIn[frameIdx % FRAME_NUM], width, height, planeNum)) - { - log_error("Frame idx: %i, Mapped OCL object is different then expected\n", frameIdx); - result.ResultSub(CResult::TEST_FAIL); - } - - for (size_t y = 0; y < height; ++y) - memcpy(static_cast(mapPtr) + y * rowPitch / sizeof(T), &bufferExp[frameIdx % FRAME_NUM][y * width * planeNum], - width * planeNum * sizeof(T)); - - error = clEnqueueUnmapMemObject(cmdQueue, objectDstShared, mapPtr, 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueUnmapMemObject failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - } - - error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast(memObjList.size()), &memObjList[0], 0, 0, 0); - if (error != CL_SUCCESS) - { - log_error("clEnqueueReleaseMediaSurfaceKHR failed: %s\n", IGetErrorString(error)); - result.ResultSub(CResult::TEST_FAIL); - } - - std::vector out(width * height * planeNum, 0); - //surface get -#if defined(_WIN32) - if (FAILED((*dx9SurfaceDst)->LockRect(&rect, NULL, 0))) - { - log_error("Surface lock failed\n"); - result.ResultSub(CResult::TEST_ERROR); - return result.Result(); - } - - pitch = rect.Pitch / sizeof(T); - lineSize = width * planeNum * sizeof(T); - ptr = static_cast(rect.pBits); - for (size_t y = 0; y < height; ++y) - memcpy(&out[y * width * planeNum], ptr + y * pitch, lineSize); - - (*dx9SurfaceDst)->UnlockRect(); -#else - return TEST_NOT_IMPLEMENTED; -#endif - - if (!DataCompare(surfaceFormat, format.image_channel_data_type, out, bufferExp[frameIdx % FRAME_NUM], width, height, planeNum)) - { - log_error("Frame idx: %i, media object is different then expected\n", frameIdx); - result.ResultSub(CResult::TEST_FAIL); - } - } - } - - if (deviceWrapper->Status() != DEVICE_PASS) - { - std::string adapterName; - AdapterToString(adapterType, adapterName); - if (deviceWrapper->Status() == DEVICE_FAIL) - { - log_error("%s init failed\n", adapterName.c_str()); - result.ResultSub(CResult::TEST_FAIL); - } - else - { - log_error("%s init incomplete due to unsupported device\n", adapterName.c_str()); - result.ResultSub(CResult::TEST_NOTSUPPORTED); - } - } - - return result.Result(); -} - -int test_other_data_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - CResult result; - -#if defined(_WIN32) - //D3D9 - if(other_data_types(deviceID, context, queue, num_elements, 10, 64, 256, CL_ADAPTER_D3D9_KHR, - SURFACE_FORMAT_R32F, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9, R32F, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 256, 128, CL_ADAPTER_D3D9_KHR, - SURFACE_FORMAT_R16F, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9, R16F, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 512, 256, CL_ADAPTER_D3D9_KHR, - SURFACE_FORMAT_L16, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9, L16, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 256, 512, CL_ADAPTER_D3D9_KHR, - SURFACE_FORMAT_A8, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9, A8, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 1024, 32, CL_ADAPTER_D3D9_KHR, - SURFACE_FORMAT_L8, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9, L8, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 32, 1024, CL_ADAPTER_D3D9_KHR, - SURFACE_FORMAT_G32R32F, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9, G32R32F, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 64, 64, CL_ADAPTER_D3D9_KHR, - SURFACE_FORMAT_G16R16F, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9, G16R16F, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9_KHR, - SURFACE_FORMAT_G16R16, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9, G16R16, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 512, 128, CL_ADAPTER_D3D9_KHR, - SURFACE_FORMAT_A8L8, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9, A8L8, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 128, 512, CL_ADAPTER_D3D9_KHR, - SURFACE_FORMAT_A32B32G32R32F, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9, A32B32G32R32F, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 128, 128, CL_ADAPTER_D3D9_KHR, - SURFACE_FORMAT_A16B16G16R16F, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9, A16B16G16R16F, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 64, 128, CL_ADAPTER_D3D9_KHR, - SURFACE_FORMAT_A16B16G16R16, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9, A16B16G16R16, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 128, 64, CL_ADAPTER_D3D9_KHR, - SURFACE_FORMAT_A8B8G8R8, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9, A8B8G8R8, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 16, 512, CL_ADAPTER_D3D9_KHR, - SURFACE_FORMAT_X8B8G8R8, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9, X8B8G8R8, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 512, 16, CL_ADAPTER_D3D9_KHR, - SURFACE_FORMAT_A8R8G8B8, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9, A8R8G8B8, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9_KHR, - SURFACE_FORMAT_X8R8G8B8, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9, X8R8G8B8, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - //D3D9EX - - if(other_data_types(deviceID, context, queue, num_elements, 10, 64, 256, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_R32F, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9EX, R32F, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 64, 256, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_R32F, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (D3D9EX, R32F, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 256, 128, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_R16F, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9EX, R16F, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 256, 128, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_R16F, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (D3D9EX, R16F, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 512, 256, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_L16, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9EX, L16, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 512, 256, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_L16, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (D3D9EX, L16, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 256, 512, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_A8, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9EX, A8, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 256, 512, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_A8, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (D3D9EX, A8, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 1024, 32, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_L8, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9EX, L8, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 1024, 32, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_L8, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (D3D9EX, L8, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 32, 1024, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_G32R32F, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9EX, G32R32F, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 32, 1024, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_G32R32F, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (D3D9EX, G32R32F, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 64, 64, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_G16R16F, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9EX, G16R16F, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 64, 64, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_G16R16F, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (D3D9EX, G16R16F, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_G16R16, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9EX, G16R16, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_G16R16, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (D3D9EX, G16R16, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 512, 128, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_A8L8, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9EX, A8L8, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 512, 128, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_A8L8, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (D3D9EX, A8L8, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 128, 512, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_A32B32G32R32F, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9EX, A32B32G32R32F, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 128, 512, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_A32B32G32R32F, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (D3D9EX, A32B32G32R32F, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 128, 128, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_A16B16G16R16F, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9EX, A16B16G16R16F, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 128, 128, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_A16B16G16R16F, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (D3D9EX, A16B16G16R16F, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 64, 128, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_A16B16G16R16, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9EX, A16B16G16R16, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 64, 128, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_A16B16G16R16, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (D3D9EX, A16B16G16R16, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 128, 64, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_A8B8G8R8, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9EX, A8B8G8R8, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 128, 64, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_A8B8G8R8, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (D3D9EX, A8B8G8R8, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 16, 512, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_X8B8G8R8, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9EX, X8B8G8R8, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 16, 512, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_X8B8G8R8, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (D3D9EX, X8B8G8R8, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 512, 16, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_A8R8G8B8, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9EX, A8R8G8B8, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 512, 16, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_A8R8G8B8, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (D3D9EX, A8R8G8B8, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_X8R8G8B8, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (D3D9EX, X8R8G8B8, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9EX_KHR, - SURFACE_FORMAT_X8R8G8B8, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (D3D9EX, X8R8G8B8, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - //DXVA - - if(other_data_types(deviceID, context, queue, num_elements, 10, 64, 256, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_R32F, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (DXVA, R32F, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 64, 256, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_R32F, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (DXVA, R32F, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 256, 128, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_R16F, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (DXVA, R16F, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 256, 128, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_R16F, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (DXVA, R16F, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 512, 256, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_L16, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (DXVA, L16, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 512, 256, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_L16, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (DXVA, L16, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 256, 512, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_A8, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (DXVA, A8, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 256, 512, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_A8, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (DXVA, A8, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 1024, 32, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_L8, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (DXVA, L8, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 1024, 32, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_L8, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (DXVA, L8, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 32, 1024, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_G32R32F, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (DXVA, G32R32F, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 32, 1024, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_G32R32F, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (DXVA, G32R32F, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 64, 64, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_G16R16F, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (DXVA, G16R16F, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 64, 64, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_G16R16F, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (DXVA, G16R16F, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_G16R16, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (DXVA, G16R16, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_G16R16, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (DXVA, G16R16, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 512, 128, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_A8L8, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (DXVA, A8L8, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 512, 128, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_A8L8, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (DXVA, A8L8, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 128, 512, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_A32B32G32R32F, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (DXVA, A32B32G32R32F, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 128, 512, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_A32B32G32R32F, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (DXVA, A32B32G32R32F, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 128, 128, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_A16B16G16R16F, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (DXVA, A16B16G16R16F, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 128, 128, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_A16B16G16R16F, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (DXVA, A16B16G16R16F, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 64, 128, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_A16B16G16R16, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (DXVA, A16B16G16R16, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 64, 128, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_A16B16G16R16, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (DXVA, A16B16G16R16, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 128, 64, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_A8B8G8R8, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (DXVA, A8B8G8R8, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 128, 64, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_A8B8G8R8, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (DXVA, A8B8G8R8, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 16, 512, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_X8B8G8R8, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (DXVA, X8B8G8R8, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 16, 512, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_X8B8G8R8, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (DXVA, X8B8G8R8, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 512, 16, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_A8R8G8B8, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (DXVA, A8R8G8B8, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 512, 16, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_A8R8G8B8, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (DXVA, A8R8G8B8, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_X8R8G8B8, SHARED_HANDLE_DISABLED) != 0) - { - log_error("\nTest case (DXVA, X8R8G8B8, no shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - - if(other_data_types(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_DXVA_KHR, - SURFACE_FORMAT_X8R8G8B8, SHARED_HANDLE_ENABLED) != 0) - { - log_error("\nTest case (DXVA, X8R8G8B8, shared handle) failed\n\n"); - result.ResultSub(CResult::TEST_FAIL); - } - -#else - return TEST_NOT_IMPLEMENTED; -#endif - - return result.Result(); -} diff --git a/test_extensions/media_sharing/utils.cpp b/test_extensions/media_sharing/utils.cpp deleted file mode 100644 index 3129643203..0000000000 --- a/test_extensions/media_sharing/utils.cpp +++ /dev/null @@ -1,1595 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "utils.h" - -#include "harness/errorHelpers.h" -#include "harness/imageHelpers.h" -#include "harness/rounding_mode.h" - -#include - -#include - -static RoundingMode gFloatToHalfRoundingMode = kDefaultRoundingMode; - - -CResult::CResult(): -_result(TEST_PASS), _resultLast(TEST_NORESULT) -{ - -} - -CResult::~CResult() -{ - -} - -CResult::TTestResult CResult::ResultLast() const -{ - return _resultLast; -} - -int CResult::Result() const -{ - switch (_result) - { - case TEST_NORESULT: - case TEST_NOTSUPPORTED: - case TEST_PASS: - return 0; - break; - case TEST_FAIL: - return 1; - break; - case TEST_ERROR: - return 2; - break; - default: - return -1; - break; - } -} - -void CResult::ResultSub( TTestResult result ) -{ - _resultLast = result; - if (static_cast(result) > static_cast(_result)) - _result = result; -} - -void FunctionContextCreateToString(TContextFuncType contextCreateFunction, std::string &contextFunction) -{ - switch(contextCreateFunction) - { - case CONTEXT_CREATE_DEFAULT: - contextFunction = "CreateContext"; - break; - case CONTEXT_CREATE_FROM_TYPE: - contextFunction = "CreateContextFromType"; - break; - default: - contextFunction = "Unknown"; - log_error("FunctionContextCreateToString(): Unknown create function enum!"); - break; - } -} - -void AdapterToString(cl_dx9_media_adapter_type_khr adapterType, std::string &adapter) -{ - switch(adapterType) - { - case CL_ADAPTER_D3D9_KHR: - adapter = "D3D9"; - break; - case CL_ADAPTER_D3D9EX_KHR: - adapter = "D3D9EX"; - break; - case CL_ADAPTER_DXVA_KHR: - adapter = "DXVA"; - break; - default: - adapter = "Unknown"; - log_error("AdapterToString(): Unknown adapter type!"); - break; - } -} - -cl_context_info AdapterTypeToContextInfo( cl_dx9_media_adapter_type_khr adapterType ) -{ - switch (adapterType) - { - case CL_ADAPTER_D3D9_KHR: - return CL_CONTEXT_ADAPTER_D3D9_KHR; - break; - case CL_ADAPTER_D3D9EX_KHR: - return CL_CONTEXT_ADAPTER_D3D9EX_KHR; - break; - case CL_ADAPTER_DXVA_KHR: - return CL_CONTEXT_ADAPTER_DXVA_KHR; - break; - default: - log_error("AdapterTypeToContextInfo(): Unknown adapter type!"); - return 0; - break; - } -} - -void YUVGenerateNV12( std::vector &yuv, unsigned int width, unsigned int height, - cl_uchar valueMin, cl_uchar valueMax, double valueAdd ) -{ - yuv.clear(); - yuv.resize(width * height * 3 / 2, 0); - - double min = static_cast(valueMin); - double max = static_cast(valueMax); - double range = 255; - double add = static_cast(valueAdd * range); - double stepX = (max - min) / static_cast(width); - double stepY = (max - min) /static_cast(height); - - //generate Y plane - for (unsigned int i = 0; i < height; ++i) - { - unsigned int offset = i * width; - double valueYPlane0 = static_cast(stepY * i); - for (unsigned int j = 0; j < width; ++j) - { - double valueXPlane0 = static_cast(stepX * j); - yuv.at(offset + j) = static_cast(min + valueXPlane0 / 2 + valueYPlane0 / 2 + add); - } - } - - //generate UV planes - for (unsigned int i = 0; i < height / 2; ++i) - { - unsigned int offset = width * height + i * width; - double valueYPlane1 = static_cast(stepY * i); - double valueYPlane2 = static_cast(stepY * (height / 2 + i)); - for (unsigned int j = 0; j < width / 2; ++j) - { - double valueXPlane1 = static_cast(stepX * j); - double valueXPlane2 = static_cast(stepX * (width / 2 + j)); - - yuv.at(offset + j * 2) = static_cast(min + valueXPlane1 / 2 + valueYPlane1 / 2 + add); - yuv.at(offset + j * 2 + 1) = static_cast(min + valueXPlane2 / 2 + valueYPlane2 / 2 + add); - } - } -} - -void YUVGenerateYV12( std::vector &yuv, unsigned int width, unsigned int height, cl_uchar valueMin, cl_uchar valueMax, double valueAdd /*= 0.0*/ ) -{ - yuv.clear(); - yuv.resize(width * height * 3 / 2, 0); - - double min = static_cast(valueMin); - double max = static_cast(valueMax); - double range = 255; - double add = static_cast(valueAdd * range); - double stepX = (max - min) / static_cast(width); - double stepY = (max - min) /static_cast(height); - - unsigned offset = 0; - - //generate Y plane - for (unsigned int i = 0; i < height; ++i) - { - unsigned int plane0Offset = offset + i * width; - double valueYPlane0 = static_cast(stepY * i); - for (unsigned int j = 0; j < width; ++j) - { - double valueXPlane0 = static_cast(stepX * j); - yuv.at(plane0Offset + j) = static_cast(min + valueXPlane0 / 2 + valueYPlane0 / 2 + add); - } - } - - //generate V plane - offset += width * height; - for (unsigned int i = 0; i < height / 2; ++i) - { - unsigned int plane1Offset = offset + i * width / 2; - double valueYPlane1 = static_cast(stepY * i); - for (unsigned int j = 0; j < width / 2; ++j) - { - double valueXPlane1 = static_cast(stepX * j); - yuv.at(plane1Offset + j) = static_cast(min + valueXPlane1 / 2 + valueYPlane1 / 2 + add); - } - } - - //generate U plane - offset += width * height / 4; - for (unsigned int i = 0; i < height / 2; ++i) - { - unsigned int plane2Offset = offset + i * width / 2; - double valueYPlane2 = static_cast(stepY * (height / 2 + i)); - for (unsigned int j = 0; j < width / 2; ++j) - { - double valueXPlane2 = static_cast(stepX * j); - yuv.at(plane2Offset + j) = static_cast(min + valueXPlane2 / 2 + valueYPlane2 / 2 + add); - } - } -} - - -bool YUVGenerate( TSurfaceFormat surfaceFormat, std::vector &yuv, unsigned int width, unsigned int height, cl_uchar valueMin, cl_uchar valueMax, double valueAdd /*= 0.0*/ ) -{ - switch (surfaceFormat) - { - case SURFACE_FORMAT_NV12: - YUVGenerateNV12(yuv, width, height, valueMin, valueMax, valueAdd); - break; - case SURFACE_FORMAT_YV12: - YUVGenerateYV12(yuv, width, height, valueMin, valueMax, valueAdd); - break; - default: - log_error("YUVGenerate(): Invalid surface type\n"); - return false; - break; - } - - return true; -} - -bool YUVSurfaceSetNV12( std::auto_ptr &surface, const std::vector &yuv, - unsigned int width, unsigned int height ) -{ -#if defined(_WIN32) - CD3D9SurfaceWrapper *d3dSurface = static_cast(surface.get()); - D3DLOCKED_RECT rect; - if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0))) - { - log_error("YUVSurfaceSetNV12(): Surface lock failed\n"); - return false; - } - - size_t pitch = rect.Pitch / sizeof(cl_uchar); - size_t lineSize = width * sizeof(cl_uchar); - cl_uchar *ptr = static_cast(rect.pBits); - for (size_t y = 0; y < height; ++y) - memcpy(ptr + y * pitch, &yuv.at(y * width), lineSize); - - for (size_t y = 0; y < height / 2; ++y) - memcpy(ptr + height * pitch + y * pitch, &yuv.at(width * height + y * width), lineSize); - - (*d3dSurface)->UnlockRect(); - - return true; - -#else - return false; -#endif -} - -bool YUVSurfaceSetYV12( std::auto_ptr &surface, const std::vector &yuv, - unsigned int width, unsigned int height ) -{ -#if defined(_WIN32) - CD3D9SurfaceWrapper *d3dSurface = static_cast(surface.get()); - D3DLOCKED_RECT rect; - if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0))) - { - log_error("YUVSurfaceSetYV12(): Surface lock failed!\n"); - return false; - } - - size_t pitch = rect.Pitch / sizeof(cl_uchar); - size_t pitchHalf = pitch / 2; - size_t lineSize = width * sizeof(cl_uchar); - size_t lineHalfSize = lineSize / 2; - size_t surfaceOffset = 0; - size_t yuvOffset = 0; - cl_uchar *ptr = static_cast(rect.pBits); - - for (size_t y = 0; y < height; ++y) - memcpy(ptr + surfaceOffset + y * pitch, &yuv.at(yuvOffset + y * width), lineSize); - - surfaceOffset += height * pitch; - yuvOffset += width * height; - for (size_t y = 0; y < height / 2; ++y) - memcpy(ptr + surfaceOffset + y * pitchHalf, &yuv.at(yuvOffset + y * lineHalfSize), lineHalfSize); - - surfaceOffset += pitchHalf * height / 2; - yuvOffset += width * height / 4; - for (size_t y = 0; y < height / 2; ++y) - memcpy(ptr + surfaceOffset + y * pitchHalf, &yuv.at(yuvOffset + y * lineHalfSize), lineHalfSize); - - (*d3dSurface)->UnlockRect(); - - return true; - -#else - return false; -#endif -} - -bool YUVSurfaceSet(TSurfaceFormat surfaceFormat, std::auto_ptr &surface, const std::vector &yuv, unsigned int width, unsigned int height ) -{ - switch (surfaceFormat) - { - case SURFACE_FORMAT_NV12: - if(!YUVSurfaceSetNV12(surface, yuv, width, height)) - return false; - break; - case SURFACE_FORMAT_YV12: - if(!YUVSurfaceSetYV12(surface, yuv, width, height)) - return false; - break; - default: - log_error("YUVSurfaceSet(): Invalid surface type!\n"); - return false; - break; - } - - return true; -} - -bool YUVSurfaceGetNV12( std::auto_ptr &surface, std::vector &yuv, - unsigned int width, unsigned int height ) -{ -#if defined(_WIN32) - CD3D9SurfaceWrapper *d3dSurface = static_cast(surface.get()); - D3DLOCKED_RECT rect; - if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0))) - { - log_error("YUVSurfaceGetNV12(): Surface lock failed!\n"); - return false; - } - - size_t pitch = rect.Pitch / sizeof(cl_uchar); - size_t lineSize = width * sizeof(cl_uchar); - cl_uchar *ptr = static_cast(rect.pBits); - size_t yuvOffset = 0; - size_t surfaceOffset = 0; - for (size_t y = 0; y < height; ++y) - memcpy(&yuv.at(yuvOffset + y * width), ptr + y * pitch, lineSize); - - yuvOffset += width * height; - surfaceOffset += pitch * height; - for (size_t y = 0; y < height / 2; ++y) - memcpy(&yuv.at(yuvOffset + y * width), ptr + surfaceOffset + y * pitch, lineSize); - - (*d3dSurface)->UnlockRect(); - - return true; - -#else - return false; -#endif -} - -bool YUVSurfaceGetYV12( std::auto_ptr &surface, std::vector &yuv, unsigned int width, unsigned int height ) -{ -#if defined(_WIN32) - CD3D9SurfaceWrapper *d3dSurface = static_cast(surface.get()); - D3DLOCKED_RECT rect; - if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0))) - { - log_error("YUVSurfaceGetYV12(): Surface lock failed!\n"); - return false; - } - - size_t pitch = rect.Pitch / sizeof(cl_uchar); - size_t pitchHalf = pitch / 2; - size_t lineSize = width * sizeof(cl_uchar); - size_t lineHalfSize = lineSize / 2; - size_t surfaceOffset = 0; - size_t yuvOffset = 0; - cl_uchar *ptr = static_cast(rect.pBits); - - for (size_t y = 0; y < height; ++y) - memcpy(&yuv.at(yuvOffset + y * width), ptr + surfaceOffset + y * pitch, lineSize); - - surfaceOffset += pitch * height; - yuvOffset += width * height; - for (size_t y = 0; y < height / 2; ++y) - memcpy(&yuv.at(yuvOffset + y * lineHalfSize), ptr + surfaceOffset + y * pitchHalf, lineHalfSize); - - surfaceOffset += pitchHalf * height / 2; - yuvOffset += width * height / 4; - for (size_t y = 0; y < height / 2; ++y) - memcpy(&yuv.at(yuvOffset + y * lineHalfSize), ptr + surfaceOffset + y * pitchHalf, lineHalfSize); - - (*d3dSurface)->UnlockRect(); - - return true; - -#else - return false; -#endif -} - -bool YUVSurfaceGet(TSurfaceFormat surfaceFormat, std::auto_ptr &surface, std::vector &yuv, - unsigned int width, unsigned int height ) -{ - switch (surfaceFormat) - { - case SURFACE_FORMAT_NV12: - if(!YUVSurfaceGetNV12(surface, yuv, width, height)) - return false; - break; - case SURFACE_FORMAT_YV12: - if(!YUVSurfaceGetYV12(surface, yuv, width, height)) - return false; - break; - default: - log_error("YUVSurfaceGet(): Invalid surface type!\n"); - return false; - break; - } - - return true; -} - -bool YUVCompareNV12( const std::vector &yuvTest, const std::vector &yuvRef, - unsigned int width, unsigned int height ) -{ - //plane 0 verification - size_t offset = 0; - for (size_t y = 0; y < height; ++y) - { - size_t plane0Offset = offset + width * y; - for (size_t x = 0; x < width; ++x) - { - if (yuvTest[plane0Offset + x] != yuvRef[plane0Offset + x]) - { - log_error("Plane 0 (Y) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n", - yuvRef[plane0Offset + x], yuvTest[plane0Offset + x], x, y); - return false; - } - } - } - - //plane 1 and 2 verification - offset += width * height; - for (size_t y = 0; y < height / 2; ++y) - { - size_t plane12Offset = offset + width * y; - for (size_t x = 0; x < width / 2; ++x) - { - if (yuvTest.at(plane12Offset + 2 * x) != yuvRef.at(plane12Offset + 2 * x)) - { - log_error("Plane 1 (U) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n", - yuvRef[plane12Offset + 2 * x], yuvTest[plane12Offset + 2 * x], x, y); - return false; - } - - if (yuvTest.at(plane12Offset + 2 * x + 1) != yuvRef.at(plane12Offset + 2 * x + 1)) - { - log_error("Plane 2 (V) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n", - yuvRef[plane12Offset + 2 * x + 1], yuvTest[plane12Offset + 2 * x + 1], x, y); - return false; - } - } - } - - return true; -} - -bool YUVCompareYV12( const std::vector &yuvTest, const std::vector &yuvRef, - unsigned int width, unsigned int height ) -{ - //plane 0 verification - size_t offset = 0; - for (size_t y = 0; y < height; ++y) - { - size_t plane0Offset = width * y; - for (size_t x = 0; x < width; ++x) - { - if (yuvTest.at(plane0Offset + x) != yuvRef.at(plane0Offset + x)) - { - log_error("Plane 0 (Y) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n", - yuvRef[plane0Offset + x], yuvTest[plane0Offset + x], x ,y); - return false; - } - } - } - - //plane 1 verification - offset += width * height; - for (size_t y = 0; y < height / 2; ++y) - { - size_t plane1Offset = offset + width * y / 2; - for (size_t x = 0; x < width / 2; ++x) - { - if (yuvTest.at(plane1Offset + x) != yuvRef.at(plane1Offset + x)) - { - log_error("Plane 1 (V) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n", - yuvRef[plane1Offset + x], yuvTest[plane1Offset + x], x, y); - return false; - } - } - } - - //plane 2 verification - offset += width * height / 4; - for (size_t y = 0; y < height / 2; ++y) - { - size_t plane2Offset = offset + width * y / 2; - for (size_t x = 0; x < width / 2; ++x) - { - if (yuvTest.at(plane2Offset + x) != yuvRef.at(plane2Offset + x)) - { - log_error("Plane 2 (U) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n", - yuvRef[plane2Offset + x], yuvTest[plane2Offset + x], x, y); - return false; - } - } - } - - return true; -} - -bool YUVCompare( TSurfaceFormat surfaceFormat, const std::vector &yuvTest, const std::vector &yuvRef, - unsigned int width, unsigned int height ) -{ - switch (surfaceFormat) - { - case SURFACE_FORMAT_NV12: - if (!YUVCompareNV12(yuvTest, yuvRef, width, height)) - { - log_error("OCL object is different than expected!\n"); - return false; - } - break; - case SURFACE_FORMAT_YV12: - if (!YUVCompareYV12(yuvTest, yuvRef, width, height)) - { - log_error("OCL object is different than expected!\n"); - return false; - } - break; - default: - log_error("YUVCompare(): Invalid surface type!\n"); - return false; - break; - } - - return true; -} - -void DataGenerate( TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector &data, unsigned int width, unsigned int height, - unsigned int channelNum, float cmin /*= 0.0f*/, float cmax /*= 1.0f*/, float add /*= 0.0f*/ ) -{ - data.clear(); - data.reserve(width * height * channelNum); - - double valueMin = static_cast(cmin); - double valueMax = static_cast(cmax); - double stepX = (valueMax - valueMin) / static_cast(width); - double stepY = (valueMax - valueMin) /static_cast(height); - double valueAdd = static_cast(add); - for (unsigned int i = 0; i < height; ++i) - { - double valueY = static_cast(stepY * i); - for (unsigned int j = 0; j < width; ++j) - { - double valueX = static_cast(stepX * j); - switch (channelNum) - { - case 1: - data.push_back(static_cast(valueMin + valueX / 2 + valueY / 2 + valueAdd)); - break; - case 2: - data.push_back(static_cast(valueMin + valueX + valueAdd)); - data.push_back(static_cast(valueMin + valueY + valueAdd)); - break; - case 4: - data.push_back(static_cast(valueMin + valueX + valueAdd)); - data.push_back(static_cast(valueMin + valueY + valueAdd)); - data.push_back(static_cast(valueMin + valueX / 2 + valueAdd)); - data.push_back(static_cast(valueMin + valueY / 2 + valueAdd)); - break; - default: - log_error("DataGenerate(): invalid channel number!"); - return; - break; - } - } - } -} - -void DataGenerate( TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector &data, unsigned int width, unsigned int height, - unsigned int channelNum, float cmin /*= 0.0f*/, float cmax /*= 1.0f*/, float add /*= 0.0f*/ ) -{ - data.clear(); - data.reserve(width * height * channelNum); - - double valueMin = static_cast(cmin); - double valueMax = static_cast(cmax); - double stepX = (valueMax - valueMin) / static_cast(width); - double stepY = (valueMax - valueMin) /static_cast(height); - - switch(type) - { - case CL_HALF_FLOAT: - { - double valueAdd = static_cast(add); - - for (unsigned int i = 0; i < height; ++i) - { - double valueY = static_cast(stepY * i); - for (unsigned int j = 0; j < width; ++j) - { - double valueX = static_cast(stepX * j); - switch (channelNum) - { - case 1: - data.push_back(convert_float_to_half(static_cast(valueMin + valueX / 2 + valueY / 2 + valueAdd))); - break; - case 2: - data.push_back(convert_float_to_half(static_cast(valueMin + valueX + valueAdd))); - data.push_back(convert_float_to_half(static_cast(valueMin + valueY + valueAdd))); - break; - case 4: - data.push_back(convert_float_to_half(static_cast(valueMin + valueX + valueAdd))); - data.push_back(convert_float_to_half(static_cast(valueMin + valueY + valueAdd))); - data.push_back(convert_float_to_half(static_cast(valueMin + valueX / 2 + valueAdd))); - data.push_back(convert_float_to_half(static_cast(valueMin + valueY / 2 + valueAdd))); - break; - default: - log_error("DataGenerate(): invalid channel number!"); - return; - break; - } - } - } - break; - } - case CL_UNORM_INT16: - { - double range = 65535; - double valueAdd = static_cast(add * range); - - for (unsigned int i = 0; i < height; ++i) - { - double valueY = static_cast(stepY * i * range); - for (unsigned int j = 0; j < width; ++j) - { - double valueX = static_cast(stepX * j * range); - switch (channelNum) - { - case 1: - data.push_back(static_cast(valueMin + valueX / 2 + valueY / 2 + valueAdd)); - break; - case 2: - data.push_back(static_cast(valueMin + valueX + valueAdd)); - data.push_back(static_cast(valueMin + valueY + valueAdd)); - break; - case 4: - data.push_back(static_cast(valueMin + valueX + valueAdd)); - data.push_back(static_cast(valueMin + valueY + valueAdd)); - data.push_back(static_cast(valueMin + valueX / 2 + valueAdd)); - data.push_back(static_cast(valueMin + valueY / 2 + valueAdd)); - break; - default: - log_error("DataGenerate(): invalid channel number!"); - return; - break; - } - } - } - } - break; - default: - log_error("DataGenerate(): unknown data type!"); - return; - break; - } -} - -void DataGenerate( TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector &data, unsigned int width, unsigned int height, - unsigned int channelNum, float cmin /*= 0.0f*/, float cmax /*= 1.0f*/, float add /*= 0.0f*/ ) -{ - data.clear(); - data.reserve(width * height * channelNum); - - double valueMin = static_cast(cmin); - double valueMax = static_cast(cmax); - double stepX = (valueMax - valueMin) / static_cast(width); - double stepY = (valueMax - valueMin) /static_cast(height); - - double range = 255; - double valueAdd = static_cast(add * range); - - for (unsigned int i = 0; i < height; ++i) - { - double valueY = static_cast(stepY * i * range); - for (unsigned int j = 0; j < width; ++j) - { - double valueX = static_cast(stepX * j * range); - switch (channelNum) - { - case 1: - data.push_back(static_cast(valueMin + valueX / 2 + valueY / 2 + valueAdd)); - break; - case 2: - data.push_back(static_cast(valueMin + valueX + valueAdd)); - data.push_back(static_cast(valueMin + valueY + valueAdd)); - break; - case 4: - data.push_back(static_cast(valueMin + valueX + valueAdd)); - data.push_back(static_cast(valueMin + valueY + valueAdd)); - data.push_back(static_cast(valueMin + valueX / 2 + valueAdd)); - if (surfaceFormat == SURFACE_FORMAT_X8R8G8B8) - data.push_back(static_cast(0xff)); - else - data.push_back(static_cast(valueMin + valueY / 2 + valueAdd)); - break; - default: - log_error("DataGenerate(): invalid channel number!"); - return; - break; - } - } - } -} - -bool DataCompare( TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector &dataTest, const std::vector &dataExp, - unsigned int width, unsigned int height, unsigned int channelNum) -{ - float epsilon = 0.000001f; - for (unsigned int i = 0; i < height; ++i) - { - unsigned int offset = i * width * channelNum; - for (unsigned int j = 0; j < width; ++j) - { - for(unsigned planeIdx = 0; planeIdx < channelNum; ++planeIdx) - { - if (abs(dataTest.at(offset + j * channelNum + planeIdx) - dataExp.at(offset + j * channelNum + planeIdx)) > epsilon) - { - log_error("Tested image is different than reference (x,y,plane) = (%i,%i,%i), test value = %f, expected value = %f\n", - j, i, planeIdx, dataTest[offset + j * channelNum + planeIdx], dataExp[offset + j * channelNum + planeIdx]); - return false; - } - } - } - } - - return true; -} - -bool DataCompare( TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector &dataTest, const std::vector &dataExp, - unsigned int width, unsigned int height, unsigned int channelNum) -{ - switch(type) - { - case CL_HALF_FLOAT: - { - float epsilon = 0.001f; - for (unsigned int i = 0; i < height; ++i) - { - unsigned int offset = i * width * channelNum; - for (unsigned int j = 0; j < width; ++j) - { - for(unsigned planeIdx = 0; planeIdx < channelNum; ++planeIdx) - { - float test = cl_half_to_float( - dataTest.at(offset + j * channelNum + planeIdx)); - float ref = cl_half_to_float( - dataExp.at(offset + j * channelNum + planeIdx)); - if (abs(test - ref) > epsilon) - { - log_error( - "Tested image is different than reference (x,y,plane) = " - "(%i,%i,%i), test value = %f, expected value = %f\n", - j, i, planeIdx, test, ref); - return false; - } - } - } - } - } - break; - case CL_UNORM_INT16: - { - cl_ushort epsilon = 1; - for (unsigned int i = 0; i < height; ++i) - { - unsigned int offset = i * width * channelNum; - for (unsigned int j = 0; j < width; ++j) - { - for(unsigned planeIdx = 0; planeIdx < channelNum; ++planeIdx) - { - cl_ushort test = dataTest.at(offset + j * channelNum + planeIdx); - cl_ushort ref = dataExp.at(offset + j * channelNum + planeIdx); - if (abs(test - ref) > epsilon) - { - log_error("Tested image is different than reference (x,y,plane) = (%i,%i,%i), test value = %i, expected value = %i\n", j, i, planeIdx, test, ref); - return false; - } - } - } - } - } - break; - default: - log_error("DataCompare(): Invalid data format!"); - return false; - break; - } - - return true; -} - -bool DataCompare( TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector &dataTest, const std::vector &dataExp, - unsigned int width, unsigned int height, unsigned int planeNum ) -{ - for (unsigned int i = 0; i < height; ++i) - { - unsigned int offset = i * width * planeNum; - for (unsigned int j = 0; j < width; ++j) - { - for(unsigned planeIdx = 0; planeIdx < planeNum; ++planeIdx) - { - if (surfaceFormat == SURFACE_FORMAT_X8R8G8B8 && planeIdx == 3) - continue; - - cl_uchar test = dataTest.at(offset + j * planeNum + planeIdx); - cl_uchar ref = dataExp.at(offset + j * planeNum + planeIdx); - if (test != ref) - { - log_error("Tested image is different than reference (x,y,plane) = (%i,%i,%i), test value = %i, expected value = %i\n", - j, i, planeIdx, test, ref); - return false; - } - } - } - } - - return true; -} - -bool GetImageInfo( cl_mem object, cl_image_format formatExp, size_t elementSizeExp, size_t rowPitchExp, - size_t slicePitchExp, size_t widthExp, size_t heightExp, size_t depthExp , unsigned int planeExp) -{ - bool result = true; - - cl_image_format format; - if (clGetImageInfo(object, CL_IMAGE_FORMAT, sizeof(cl_image_format), &format, 0) != CL_SUCCESS) - { - log_error("clGetImageInfo(CL_IMAGE_FORMAT) failed\n"); - result = false; - } - - if (formatExp.image_channel_order != format.image_channel_order || formatExp.image_channel_data_type != format.image_channel_data_type) - { - log_error("Value of CL_IMAGE_FORMAT is different than expected\n"); - result = false; - } - - size_t elementSize = 0; - if (clGetImageInfo(object, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elementSize, 0) != CL_SUCCESS) - { - log_error("clGetImageInfo(CL_IMAGE_ELEMENT_SIZE) failed\n"); - result = false; - } - - if (elementSizeExp != elementSize) - { - log_error("Value of CL_IMAGE_ELEMENT_SIZE is different than expected (size: %i, exp size: %i)\n", elementSize, elementSizeExp); - result = false; - } - - size_t rowPitch = 0; - if (clGetImageInfo(object, CL_IMAGE_ROW_PITCH, sizeof(size_t), &rowPitch, 0) != CL_SUCCESS) - { - log_error("clGetImageInfo(CL_IMAGE_ROW_PITCH) failed\n"); - result = false; - } - - if ((rowPitchExp == 0 && rowPitchExp != rowPitch) || (rowPitchExp > 0 && rowPitchExp > rowPitch)) - { - log_error("Value of CL_IMAGE_ROW_PITCH is different than expected (size: %i, exp size: %i)\n", rowPitch, rowPitchExp); - result = false; - } - - size_t slicePitch = 0; - if (clGetImageInfo(object, CL_IMAGE_SLICE_PITCH, sizeof(size_t), &slicePitch, 0) != CL_SUCCESS) - { - log_error("clGetImageInfo(CL_IMAGE_SLICE_PITCH) failed\n"); - result = false; - } - - if ((slicePitchExp == 0 && slicePitchExp != slicePitch) || (slicePitchExp > 0 && slicePitchExp > slicePitch)) - { - log_error("Value of CL_IMAGE_SLICE_PITCH is different than expected (size: %i, exp size: %i)\n", slicePitch, slicePitchExp); - result = false; - } - - size_t width = 0; - if (clGetImageInfo(object, CL_IMAGE_WIDTH, sizeof(size_t), &width, 0) != CL_SUCCESS) - { - log_error("clGetImageInfo(CL_IMAGE_WIDTH) failed\n"); - result = false; - } - - if (widthExp != width) - { - log_error("Value of CL_IMAGE_WIDTH is different than expected (size: %i, exp size: %i)\n", width, widthExp); - result = false; - } - - size_t height = 0; - if (clGetImageInfo(object, CL_IMAGE_HEIGHT, sizeof(size_t), &height, 0) != CL_SUCCESS) - { - log_error("clGetImageInfo(CL_IMAGE_HEIGHT) failed\n"); - result = false; - } - - if (heightExp != height) - { - log_error("Value of CL_IMAGE_HEIGHT is different than expected (size: %i, exp size: %i)\n", height, heightExp); - result = false; - } - - size_t depth = 0; - if (clGetImageInfo(object, CL_IMAGE_DEPTH, sizeof(size_t), &depth, 0) != CL_SUCCESS) - { - log_error("clGetImageInfo(CL_IMAGE_DEPTH) failed\n"); - result = false; - } - - if (depthExp != depth) - { - log_error("Value of CL_IMAGE_DEPTH is different than expected (size: %i, exp size: %i)\n", depth, depthExp); - result = false; - } - - unsigned int plane = 99; - size_t paramSize = 0; - if (clGetImageInfo(object, CL_IMAGE_DX9_MEDIA_PLANE_KHR, sizeof(unsigned int), &plane, ¶mSize) != CL_SUCCESS) - { - log_error("clGetImageInfo(CL_IMAGE_MEDIA_SURFACE_PLANE_KHR) failed\n"); - result = false; - } - - if (planeExp != plane) - { - log_error("Value of CL_IMAGE_MEDIA_SURFACE_PLANE_KHR is different than expected (plane: %i, exp plane: %i)\n", plane, planeExp); - result = false; - } - - return result; -} - -bool GetMemObjInfo( cl_mem object, cl_dx9_media_adapter_type_khr adapterType, std::auto_ptr &surface, void *shareHandleExp ) -{ - bool result = true; - switch(adapterType) - { - case CL_ADAPTER_D3D9_KHR: - case CL_ADAPTER_D3D9EX_KHR: - case CL_ADAPTER_DXVA_KHR: - { -#if defined(_WIN32) - cl_dx9_surface_info_khr surfaceInfo; -#else - void *surfaceInfo = 0; - return false; -#endif - size_t paramSize = 0; - if(clGetMemObjectInfo(object, CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR, sizeof(surfaceInfo), &surfaceInfo, ¶mSize) != CL_SUCCESS) - { - log_error("clGetImageInfo(CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR) failed\n"); - result = false; - } - -#if defined(_WIN32) - CD3D9SurfaceWrapper *d3d9Surface = static_cast(surface.get()); - if (*d3d9Surface != surfaceInfo.resource) - { - log_error("Invalid resource for CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR\n"); - result = false; - } - - if (shareHandleExp != surfaceInfo.shared_handle) - { - log_error("Invalid shared handle for CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR\n"); - result = false; - } -#else - return false; -#endif - - if (paramSize != sizeof(surfaceInfo)) - { - log_error("Invalid CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR parameter size: %i, expected: %i\n", paramSize, sizeof(surfaceInfo)); - result = false; - } - - paramSize = 0; - cl_dx9_media_adapter_type_khr mediaAdapterType; - if(clGetMemObjectInfo(object, CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR, sizeof(mediaAdapterType), &mediaAdapterType, ¶mSize) != CL_SUCCESS) - { - log_error("clGetImageInfo(CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR) failed\n"); - result = false; - } - - if (adapterType != mediaAdapterType) - { - log_error("Invalid media adapter type for CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR\n"); - result = false; - } - - if (paramSize != sizeof(mediaAdapterType)) - { - log_error("Invalid CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR parameter size: %i, expected: %i\n", paramSize, sizeof(mediaAdapterType)); - result = false; - } - } - break; - default: - log_error("GetMemObjInfo(): Unknown adapter type!\n"); - return false; - break; - } - - return result; -} - -bool ImageInfoVerify( cl_dx9_media_adapter_type_khr adapterType, const std::vector &memObjList, unsigned int width, unsigned int height, - std::auto_ptr &surface, void *sharedHandle) -{ - if (memObjList.size() != 2 && memObjList.size() != 3) - { - log_error("ImageInfoVerify(): Invalid object list parameter\n"); - return false; - } - - cl_image_format formatPlane; - formatPlane.image_channel_data_type = CL_UNORM_INT8; - formatPlane.image_channel_order = CL_R; - - //plane 0 verification - if (!GetImageInfo(memObjList[0], formatPlane, sizeof(cl_uchar), - width * sizeof(cl_uchar), - 0, - width, height, 0, 0)) - { - log_error("clGetImageInfo failed\n"); - return false; - } - - switch (memObjList.size()) - { - case 2: - { - formatPlane.image_channel_data_type = CL_UNORM_INT8; - formatPlane.image_channel_order = CL_RG; - if (!GetImageInfo(memObjList[1], formatPlane, sizeof(cl_uchar) * 2, - width * sizeof(cl_uchar), - 0, - width / 2, height / 2, 0, 1)) - { - log_error("clGetImageInfo failed\n"); - return false; - } - } - break; - case 3: - { - if (!GetImageInfo(memObjList[1], formatPlane, sizeof(cl_uchar), - width * sizeof(cl_uchar) / 2, - 0, - width / 2, height / 2, 0, 1)) - { - log_error("clGetImageInfo failed\n"); - return false; - } - - if (!GetImageInfo(memObjList[2], formatPlane, sizeof(cl_uchar), - width * sizeof(cl_uchar) / 2, - 0, - width / 2, height / 2, 0, 2)) - { - log_error("clGetImageInfo failed\n"); - return false; - } - } - break; - default: - log_error("ImageInfoVerify(): Invalid object list parameter\n"); - return false; - break; - } - - for (size_t i = 0; i < memObjList.size(); ++i) - { - if (!GetMemObjInfo(memObjList[i], adapterType, surface, sharedHandle)) - { - log_error("clGetMemObjInfo(%i) failed\n", i); - return false; - } - } - - return true; -} - -bool ImageFormatCheck(cl_context context, cl_mem_object_type imageType, const cl_image_format imageFormatCheck) -{ - cl_uint imageFormatsNum = 0; - cl_int error = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, imageType, 0, 0, &imageFormatsNum); - if(error != CL_SUCCESS) - { - log_error("clGetSupportedImageFormats failed\n"); - return false; - } - - if(imageFormatsNum < 1) - { - log_error("Invalid image format number returned by clGetSupportedImageFormats\n"); - return false; - } - - std::vector imageFormats(imageFormatsNum); - error = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, imageType, imageFormatsNum, &imageFormats[0], 0); - if(error != CL_SUCCESS) - { - log_error("clGetSupportedImageFormats failed\n"); - return false; - } - - for(cl_uint i = 0; i < imageFormatsNum; ++i) - { - if(imageFormats[i].image_channel_data_type == imageFormatCheck.image_channel_data_type - && imageFormats[i].image_channel_order == imageFormatCheck.image_channel_order) - { - return true; - } - } - - return false; -} - -unsigned int ChannelNum( TSurfaceFormat surfaceFormat ) -{ - switch(surfaceFormat) - { - case SURFACE_FORMAT_R32F: - case SURFACE_FORMAT_R16F: - case SURFACE_FORMAT_L16: - case SURFACE_FORMAT_A8: - case SURFACE_FORMAT_L8: - return 1; - break; - case SURFACE_FORMAT_G32R32F: - case SURFACE_FORMAT_G16R16F: - case SURFACE_FORMAT_G16R16: - case SURFACE_FORMAT_A8L8: - return 2; - break; - case SURFACE_FORMAT_NV12: - case SURFACE_FORMAT_YV12: - return 3; - break; - case SURFACE_FORMAT_A32B32G32R32F: - case SURFACE_FORMAT_A16B16G16R16F: - case SURFACE_FORMAT_A16B16G16R16: - case SURFACE_FORMAT_A8B8G8R8: - case SURFACE_FORMAT_X8B8G8R8: - case SURFACE_FORMAT_A8R8G8B8: - case SURFACE_FORMAT_X8R8G8B8: - return 4; - break; - default: - log_error("ChannelNum(): unknown surface format!\n"); - return 0; - break; - } -} - -unsigned int PlanesNum( TSurfaceFormat surfaceFormat ) -{ - switch(surfaceFormat) - { - case SURFACE_FORMAT_R32F: - case SURFACE_FORMAT_R16F: - case SURFACE_FORMAT_L16: - case SURFACE_FORMAT_A8: - case SURFACE_FORMAT_L8: - case SURFACE_FORMAT_G32R32F: - case SURFACE_FORMAT_G16R16F: - case SURFACE_FORMAT_G16R16: - case SURFACE_FORMAT_A8L8: - case SURFACE_FORMAT_A32B32G32R32F: - case SURFACE_FORMAT_A16B16G16R16F: - case SURFACE_FORMAT_A16B16G16R16: - case SURFACE_FORMAT_A8B8G8R8: - case SURFACE_FORMAT_X8B8G8R8: - case SURFACE_FORMAT_A8R8G8B8: - case SURFACE_FORMAT_X8R8G8B8: - return 1; - break; - case SURFACE_FORMAT_NV12: - return 2; - break; - case SURFACE_FORMAT_YV12: - return 3; - break; - default: - log_error("PlanesNum(): unknown surface format!\n"); - return 0; - break; - } -} - -#if defined(_WIN32) -D3DFORMAT SurfaceFormatToD3D(TSurfaceFormat surfaceFormat) -{ - switch(surfaceFormat) - { - case SURFACE_FORMAT_R32F: - return D3DFMT_R32F; - break; - case SURFACE_FORMAT_R16F: - return D3DFMT_R16F; - break; - case SURFACE_FORMAT_L16: - return D3DFMT_L16; - break; - case SURFACE_FORMAT_A8: - return D3DFMT_A8; - break; - case SURFACE_FORMAT_L8: - return D3DFMT_L8; - break; - case SURFACE_FORMAT_G32R32F: - return D3DFMT_G32R32F; - break; - case SURFACE_FORMAT_G16R16F: - return D3DFMT_G16R16F; - break; - case SURFACE_FORMAT_G16R16: - return D3DFMT_G16R16; - break; - case SURFACE_FORMAT_A8L8: - return D3DFMT_A8L8; - break; - case SURFACE_FORMAT_A32B32G32R32F: - return D3DFMT_A32B32G32R32F; - break; - case SURFACE_FORMAT_A16B16G16R16F: - return D3DFMT_A16B16G16R16F; - break; - case SURFACE_FORMAT_A16B16G16R16: - return D3DFMT_A16B16G16R16; - break; - case SURFACE_FORMAT_A8B8G8R8: - return D3DFMT_A8B8G8R8; - break; - case SURFACE_FORMAT_X8B8G8R8: - return D3DFMT_X8B8G8R8; - break; - case SURFACE_FORMAT_A8R8G8B8: - return D3DFMT_A8R8G8B8; - break; - case SURFACE_FORMAT_X8R8G8B8: - return D3DFMT_X8R8G8B8; - break; - case SURFACE_FORMAT_NV12: - return static_cast(MAKEFOURCC('N', 'V', '1', '2')); - break; - case SURFACE_FORMAT_YV12: - return static_cast(MAKEFOURCC('Y', 'V', '1', '2')); - break; - default: - log_error("SurfaceFormatToD3D(): unknown surface format!\n"); - return D3DFMT_R32F; - break; - } -} -#endif - -bool DeviceCreate( cl_dx9_media_adapter_type_khr adapterType, std::auto_ptr &device ) -{ - switch (adapterType) - { -#if defined(_WIN32) - case CL_ADAPTER_D3D9_KHR: - device = std::auto_ptr(new CD3D9Wrapper()); - break; - case CL_ADAPTER_D3D9EX_KHR: - device = std::auto_ptr(new CD3D9ExWrapper()); - break; - case CL_ADAPTER_DXVA_KHR: - device = std::auto_ptr(new CDXVAWrapper()); - break; -#endif - default: - log_error("DeviceCreate(): Unknown adapter type!\n"); - return false; - break; - } - - return device->Status(); -} - -bool SurfaceFormatCheck( cl_dx9_media_adapter_type_khr adapterType, const CDeviceWrapper &device, TSurfaceFormat surfaceFormat ) -{ - switch (adapterType) - { -#if defined(_WIN32) - case CL_ADAPTER_D3D9_KHR: - case CL_ADAPTER_D3D9EX_KHR: - case CL_ADAPTER_DXVA_KHR: - { - D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat); - LPDIRECT3D9 d3d9 = static_cast(device.D3D()); - D3DDISPLAYMODE d3ddm; - d3d9->GetAdapterDisplayMode(device.AdapterIdx(), &d3ddm); - - if( FAILED(d3d9->CheckDeviceFormat(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, d3ddm.Format, 0, D3DRTYPE_SURFACE, d3dFormat)) ) - return false; - } - break; -#endif - default: - log_error("SurfaceFormatCheck(): Unknown adapter type!\n"); - return false; - break; - } - - return true; -} - -bool SurfaceFormatToOCL(TSurfaceFormat surfaceFormat, cl_image_format &format) -{ - switch(surfaceFormat) - { - case SURFACE_FORMAT_R32F: - format.image_channel_order = CL_R; - format.image_channel_data_type = CL_FLOAT; - break; - case SURFACE_FORMAT_R16F: - format.image_channel_order = CL_R; - format.image_channel_data_type = CL_HALF_FLOAT; - break; - case SURFACE_FORMAT_L16: - format.image_channel_order = CL_R; - format.image_channel_data_type = CL_UNORM_INT16; - break; - case SURFACE_FORMAT_A8: - format.image_channel_order = CL_A; - format.image_channel_data_type = CL_UNORM_INT8; - break; - case SURFACE_FORMAT_L8: - format.image_channel_order = CL_R; - format.image_channel_data_type = CL_UNORM_INT8; - break; - case SURFACE_FORMAT_G32R32F: - format.image_channel_order = CL_RG; - format.image_channel_data_type = CL_FLOAT; - break; - case SURFACE_FORMAT_G16R16F: - format.image_channel_order = CL_RG; - format.image_channel_data_type = CL_HALF_FLOAT; - break; - case SURFACE_FORMAT_G16R16: - format.image_channel_order = CL_RG; - format.image_channel_data_type = CL_UNORM_INT16; - break; - case SURFACE_FORMAT_A8L8: - format.image_channel_order = CL_RG; - format.image_channel_data_type = CL_UNORM_INT8; - break; - case SURFACE_FORMAT_A32B32G32R32F: - format.image_channel_order = CL_RGBA; - format.image_channel_data_type = CL_FLOAT; - break; - case SURFACE_FORMAT_A16B16G16R16F: - format.image_channel_order = CL_RGBA; - format.image_channel_data_type = CL_HALF_FLOAT; - break; - case SURFACE_FORMAT_A16B16G16R16: - format.image_channel_order = CL_RGBA; - format.image_channel_data_type = CL_UNORM_INT16; - break; - case SURFACE_FORMAT_A8B8G8R8: - format.image_channel_order = CL_RGBA; - format.image_channel_data_type = CL_UNORM_INT8; - break; - case SURFACE_FORMAT_X8B8G8R8: - format.image_channel_order = CL_RGBA; - format.image_channel_data_type = CL_UNORM_INT8; - break; - case SURFACE_FORMAT_A8R8G8B8: - format.image_channel_order = CL_BGRA; - format.image_channel_data_type = CL_UNORM_INT8; - break; - case SURFACE_FORMAT_X8R8G8B8: - format.image_channel_order = CL_BGRA; - format.image_channel_data_type = CL_UNORM_INT8; - break; - case SURFACE_FORMAT_NV12: - format.image_channel_order = CL_R; - format.image_channel_data_type = CL_UNORM_INT8; - break; - case SURFACE_FORMAT_YV12: - format.image_channel_order = CL_R; - format.image_channel_data_type = CL_UNORM_INT8; - break; - default: - log_error("SurfaceFormatToOCL(): Unknown surface format!\n"); - return false; - break; - } - - return true; -} - -void SurfaceFormatToString( TSurfaceFormat surfaceFormat, std::string &str ) -{ - switch(surfaceFormat) - { - case SURFACE_FORMAT_R32F: - str = "R32F"; - break; - case SURFACE_FORMAT_R16F: - str = "R16F"; - break; - case SURFACE_FORMAT_L16: - str = "L16"; - break; - case SURFACE_FORMAT_A8: - str = "A8"; - break; - case SURFACE_FORMAT_L8: - str = "L8"; - break; - case SURFACE_FORMAT_G32R32F: - str = "G32R32F"; - break; - case SURFACE_FORMAT_G16R16F: - str = "G16R16F"; - break; - case SURFACE_FORMAT_G16R16: - str = "G16R16"; - break; - case SURFACE_FORMAT_A8L8: - str = "A8L8"; - break; - case SURFACE_FORMAT_A32B32G32R32F: - str = "A32B32G32R32F"; - break; - case SURFACE_FORMAT_A16B16G16R16F: - str = "A16B16G16R16F"; - break; - case SURFACE_FORMAT_A16B16G16R16: - str = "A16B16G16R16"; - break; - case SURFACE_FORMAT_A8B8G8R8: - str = "A8B8G8R8"; - break; - case SURFACE_FORMAT_X8B8G8R8: - str = "X8B8G8R8"; - break; - case SURFACE_FORMAT_A8R8G8B8: - str = "A8R8G8B8"; - break; - case SURFACE_FORMAT_X8R8G8B8: - str = "X8R8G8B8"; - break; - case SURFACE_FORMAT_NV12: - str = "NV12"; - break; - case SURFACE_FORMAT_YV12: - str = "YV12"; - break; - default: - log_error("SurfaceFormatToString(): unknown surface format!\n"); - str = "unknown"; - break; - } -} - -bool MediaSurfaceCreate(cl_dx9_media_adapter_type_khr adapterType, unsigned int width, unsigned int height, TSurfaceFormat surfaceFormat, - CDeviceWrapper &device, std::auto_ptr &surface, bool sharedHandle, void **objectSharedHandle) -{ - switch (adapterType) - { -#if defined(_WIN32) - case CL_ADAPTER_D3D9_KHR: - { - surface = std::auto_ptr(new CD3D9SurfaceWrapper); - CD3D9SurfaceWrapper *d3dSurface = static_cast(surface.get()); - HRESULT hr = 0; - D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat); - LPDIRECT3DDEVICE9 d3d9Device = (LPDIRECT3DDEVICE9)device.Device(); - hr = d3d9Device->CreateOffscreenPlainSurface(width, height, d3dFormat, D3DPOOL_DEFAULT, &(*d3dSurface), - sharedHandle ? objectSharedHandle: 0); - - if ( FAILED(hr)) - { - log_error("CreateOffscreenPlainSurface failed\n"); - return false; - } - } - break; - case CL_ADAPTER_D3D9EX_KHR: - { - surface = std::auto_ptr(new CD3D9SurfaceWrapper); - CD3D9SurfaceWrapper *d3dSurface = static_cast(surface.get()); - HRESULT hr = 0; - D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat); - LPDIRECT3DDEVICE9EX d3d9ExDevice = (LPDIRECT3DDEVICE9EX)device.Device(); - hr = d3d9ExDevice->CreateOffscreenPlainSurface(width, height, d3dFormat, D3DPOOL_DEFAULT, &(*d3dSurface), - sharedHandle ? objectSharedHandle: 0); - - if ( FAILED(hr)) - { - log_error("CreateOffscreenPlainSurface failed\n"); - return false; - } - } - break; - case CL_ADAPTER_DXVA_KHR: - { - surface = std::auto_ptr(new CD3D9SurfaceWrapper); - CD3D9SurfaceWrapper *d3dSurface = static_cast(surface.get()); - HRESULT hr = 0; - D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat); - IDXVAHD_Device *dxvaDevice = (IDXVAHD_Device *)device.Device(); - hr = dxvaDevice->CreateVideoSurface(width, height, d3dFormat, D3DPOOL_DEFAULT, 0, - DXVAHD_SURFACE_TYPE_VIDEO_INPUT, 1, &(*d3dSurface), sharedHandle ? objectSharedHandle: 0); - - if ( FAILED(hr)) - { - log_error("CreateVideoSurface failed\n"); - return false; - } - } - break; -#endif - default: - log_error("MediaSurfaceCreate(): Unknown adapter type!\n"); - return false; - break; - } - - return true; -} - -cl_int deviceExistForCLTest(cl_platform_id platform, - cl_dx9_media_adapter_type_khr media_adapters_type, - void *media_adapters, - CResult &result, - TSharedHandleType sharedHandle /*default SHARED_HANDLE_ENABLED*/ - ) -{ - cl_int _error; - cl_uint devicesAllNum = 0; - std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no"; - std::string adapterStr; - AdapterToString(media_adapters_type, adapterStr); - - _error = clGetDeviceIDsFromDX9MediaAdapterKHR(platform, 1, - &media_adapters_type, &media_adapters, CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, 0, 0, &devicesAllNum); - - if (_error != CL_SUCCESS) - { - if(_error != CL_DEVICE_NOT_FOUND) - { - log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n", IGetErrorString(_error)); - result.ResultSub(CResult::TEST_ERROR); - } - else - { - log_info("Skipping test case, device type is not supported by a device (adapter type: %s, shared handle: %s)\n", adapterStr.c_str(), sharedHandleStr.c_str()); - result.ResultSub(CResult::TEST_NOTSUPPORTED); - } - } - - return _error; -} diff --git a/test_extensions/media_sharing/utils.h b/test_extensions/media_sharing/utils.h deleted file mode 100644 index f98090ca83..0000000000 --- a/test_extensions/media_sharing/utils.h +++ /dev/null @@ -1,167 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef __UTILS_KHR_MEDIA_H -#define __UTILS_KHR_MEDIA_H - -#include -#include -#include -#include -#include "wrappers.h" -#include "CL/cl_dx9_media_sharing.h" - -#include "harness/typeWrappers.h" - - - - - -extern clGetDeviceIDsFromDX9MediaAdapterKHR_fn clGetDeviceIDsFromDX9MediaAdapterKHR; -extern clCreateFromDX9MediaSurfaceKHR_fn clCreateFromDX9MediaSurfaceKHR; -extern clEnqueueAcquireDX9MediaSurfacesKHR_fn clEnqueueAcquireDX9MediaSurfacesKHR; -extern clEnqueueReleaseDX9MediaSurfacesKHR_fn clEnqueueReleaseDX9MediaSurfacesKHR; - -extern cl_platform_id gPlatformIDdetected; -extern cl_device_id gDeviceIDdetected; -extern cl_device_type gDeviceTypeSelected; - -#define NL "\n" -#define TEST_NOT_IMPLEMENTED -1 -#define TEST_NOT_SUPPORTED -2 - -enum TSurfaceFormat -{ - SURFACE_FORMAT_NV12, - SURFACE_FORMAT_YV12, - SURFACE_FORMAT_R32F, - SURFACE_FORMAT_R16F, - SURFACE_FORMAT_L16, - SURFACE_FORMAT_A8, - SURFACE_FORMAT_L8, - SURFACE_FORMAT_G32R32F, - SURFACE_FORMAT_G16R16F, - SURFACE_FORMAT_G16R16, - SURFACE_FORMAT_A8L8, - SURFACE_FORMAT_A32B32G32R32F, - SURFACE_FORMAT_A16B16G16R16F, - SURFACE_FORMAT_A16B16G16R16, - SURFACE_FORMAT_A8B8G8R8, - SURFACE_FORMAT_X8B8G8R8, - SURFACE_FORMAT_A8R8G8B8, - SURFACE_FORMAT_X8R8G8B8, -}; - -enum TContextFuncType -{ - CONTEXT_CREATE_DEFAULT, - CONTEXT_CREATE_FROM_TYPE, -}; - -enum TSharedHandleType -{ - SHARED_HANDLE_ENABLED, - SHARED_HANDLE_DISABLED, -}; - -class CResult { -public: - enum TTestResult { - TEST_NORESULT, - TEST_NOTSUPPORTED, - TEST_PASS, - TEST_FAIL, - TEST_ERROR, - }; - - CResult(); - ~CResult(); - - void ResultSub(TTestResult result); - TTestResult ResultLast() const; - int Result() const; - -private: - TTestResult _result; - TTestResult _resultLast; -}; - -void FunctionContextCreateToString(TContextFuncType contextCreateFunction, std::string &contextFunction); -void AdapterToString(cl_dx9_media_adapter_type_khr adapterType, std::string &adapter); -cl_context_info AdapterTypeToContextInfo(cl_dx9_media_adapter_type_khr adapterType); - -//YUV utils -void YUVGenerateNV12(std::vector &yuv, unsigned int width, unsigned int height, - cl_uchar valueMin, cl_uchar valueMax, double valueAdd = 0.0); -void YUVGenerateYV12(std::vector &yuv, unsigned int width, unsigned int height, - cl_uchar valueMin, cl_uchar valueMax, double valueAdd = 0.0); -bool YUVGenerate(TSurfaceFormat surfaceFormat, std::vector &yuv, unsigned int width, unsigned int height, - cl_uchar valueMin, cl_uchar valueMax, double valueAdd = 0.0); -bool YUVSurfaceSetNV12(std::auto_ptr &surface, const std::vector &yuv, - unsigned int width, unsigned int height); -bool YUVSurfaceSetYV12(std::auto_ptr &surface, const std::vector &yuv, - unsigned int width, unsigned int height); -bool YUVSurfaceSet(TSurfaceFormat surfaceFormat, std::auto_ptr &surface, const std::vector &yuv, - unsigned int width, unsigned int height); -bool YUVSurfaceGetNV12(std::auto_ptr &surface, std::vector &yuv, - unsigned int width, unsigned int height); -bool YUVSurfaceGetYV12(std::auto_ptr &surface, std::vector &yuv, - unsigned int width, unsigned int height); -bool YUVSurfaceGet(TSurfaceFormat surfaceFormat, std::auto_ptr &surface, std::vector &yuv, - unsigned int width, unsigned int height); -bool YUVCompareNV12(const std::vector &yuvTest, const std::vector &yuvRef, - unsigned int width, unsigned int height); -bool YUVCompareYV12(const std::vector &yuvTest, const std::vector &yuvRef, - unsigned int width, unsigned int height); -bool YUVCompare(TSurfaceFormat surfaceFormat, const std::vector &yuvTest, const std::vector &yuvRef, - unsigned int width, unsigned int height); - -//other types utils -void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector &data, unsigned int width, unsigned int height, - unsigned int channelNum, float cmin = 0.0f, float cmax = 1.0f, float add = 0.0f); -void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector &data, unsigned int width, unsigned int height, - unsigned int channelNum, float cmin = 0.0f, float cmax = 1.0f, float add = 0.0f); -void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector &data, unsigned int width, unsigned int height, - unsigned int channelNum, float cmin = 0.0f, float cmax = 1.0f, float add = 0.0f); -bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector &dataTest, const std::vector &dataExp, - unsigned int width, unsigned int height, unsigned int channelNum); -bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector &dataTest, const std::vector &dataExp, - unsigned int width, unsigned int height, unsigned int channelNum); -bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector &dataTest, const std::vector &dataExp, - unsigned int width, unsigned int height, unsigned int channelNum); - -bool GetImageInfo(cl_mem object, cl_image_format formatExp, size_t elementSizeExp, - size_t rowPitchExp, size_t slicePitchExp, size_t widthExp, - size_t heightExp, size_t depthExp, unsigned int planeExp); -bool GetMemObjInfo(cl_mem object, cl_dx9_media_adapter_type_khr adapterType, std::auto_ptr &surface, void *shareHandleExp); -bool ImageInfoVerify(cl_dx9_media_adapter_type_khr adapterType, const std::vector &memObjList, unsigned int width, unsigned int height, - std::auto_ptr &surface, void *sharedHandle); -bool ImageFormatCheck(cl_context context, cl_mem_object_type imageType, const cl_image_format imageFormatCheck); -unsigned int ChannelNum(TSurfaceFormat surfaceFormat); -unsigned int PlanesNum(TSurfaceFormat surfaceFormat); - -#if defined(_WIN32) -D3DFORMAT SurfaceFormatToD3D(TSurfaceFormat surfaceFormat); -#endif - -bool DeviceCreate(cl_dx9_media_adapter_type_khr adapterType, std::auto_ptr &device); -bool SurfaceFormatCheck(cl_dx9_media_adapter_type_khr adapterType, const CDeviceWrapper &device, TSurfaceFormat surfaceFormat); -bool SurfaceFormatToOCL(TSurfaceFormat surfaceFormat, cl_image_format &format); -void SurfaceFormatToString(TSurfaceFormat surfaceFormat, std::string &str ); -bool MediaSurfaceCreate(cl_dx9_media_adapter_type_khr adapterType, unsigned int width, unsigned int height, TSurfaceFormat surfaceFormat, - CDeviceWrapper &device, std::auto_ptr &surface, bool sharedHandle, void **objectSharedHandle); - -cl_int deviceExistForCLTest(cl_platform_id platform,cl_dx9_media_adapter_type_khr media_adapters_type,void *media_adapters,CResult &result,TSharedHandleType sharedHandle=SHARED_HANDLE_DISABLED); -#endif // __UTILS_KHR_MEDIA_H diff --git a/test_extensions/media_sharing/wrappers.cpp b/test_extensions/media_sharing/wrappers.cpp deleted file mode 100644 index e7eb5b2bfd..0000000000 --- a/test_extensions/media_sharing/wrappers.cpp +++ /dev/null @@ -1,562 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "wrappers.h" -#include "harness/errorHelpers.h" - -LPCTSTR CDeviceWrapper::WINDOW_TITLE = _T( "cl_khr_dx9_media_sharing" ); -const int CDeviceWrapper::WINDOW_WIDTH = 256; -const int CDeviceWrapper::WINDOW_HEIGHT = 256; -CDeviceWrapper::TAccelerationType CDeviceWrapper::accelerationType = CDeviceWrapper::ACCELERATION_HW; - -#if defined(_WIN32) -const D3DFORMAT CDXVAWrapper::RENDER_TARGET_FORMAT = D3DFMT_X8R8G8B8; -const D3DFORMAT CDXVAWrapper::VIDEO_FORMAT = D3DFMT_X8R8G8B8; -const unsigned int CDXVAWrapper::VIDEO_FPS = 60; -#endif - -#if defined(_WIN32) -static LRESULT WINAPI WndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam) -{ - switch(msg) - { - case WM_DESTROY: - PostQuitMessage(0); - return 0; - case WM_PAINT: - ValidateRect(hWnd, 0); - return 0; - default: - break; - } - - return DefWindowProc(hWnd, msg, wParam, lParam); -} -#endif - -CDeviceWrapper::CDeviceWrapper() -#if defined(_WIN32) -:_hInstance(NULL),_hWnd(NULL) -#endif -{ - -} - -void CDeviceWrapper::WindowInit() -{ -#if defined(_WIN32) - _hInstance = GetModuleHandle(NULL); - static WNDCLASSEX wc = - { - sizeof(WNDCLASSEX), - CS_CLASSDC, - WndProc, - 0L, - 0L, - _hInstance, - NULL, - NULL, - NULL, - NULL, - WINDOW_TITLE, - NULL - }; - - RegisterClassEx(&wc); - - _hWnd = CreateWindow( - WINDOW_TITLE, - WINDOW_TITLE, - WS_OVERLAPPEDWINDOW, - 0, 0, - WINDOW_WIDTH, WINDOW_HEIGHT, - NULL, - NULL, - wc.hInstance, - NULL); - - if (!_hWnd) - { - log_error("Failed to create window"); - return; - } - - ShowWindow(_hWnd,SW_SHOWDEFAULT); - UpdateWindow(_hWnd); -#endif -} - -void CDeviceWrapper::WindowDestroy() -{ -#if defined(_WIN32) - if (_hWnd) - DestroyWindow(_hWnd); - _hWnd = NULL; -#endif -} - -#if defined(_WIN32) -HWND CDeviceWrapper::WindowHandle() const -{ - return _hWnd; -} -#endif - -int CDeviceWrapper::WindowWidth() const -{ - return WINDOW_WIDTH; -} - -int CDeviceWrapper::WindowHeight() const -{ - return WINDOW_HEIGHT; -} - -CDeviceWrapper::TAccelerationType CDeviceWrapper::AccelerationType() -{ - return accelerationType; -} - -void CDeviceWrapper::AccelerationType( TAccelerationType accelerationTypeNew ) -{ - accelerationType = accelerationTypeNew; -} - -CDeviceWrapper::~CDeviceWrapper() -{ - WindowDestroy(); -} - -#if defined(_WIN32) -CD3D9Wrapper::CD3D9Wrapper(): -_d3d9(NULL), _d3dDevice(NULL), _status(DEVICE_PASS), _adapterIdx(0), _adapterFound(false) -{ - WindowInit(); - - _d3d9 = Direct3DCreate9(D3D_SDK_VERSION); - if (!_d3d9) - { - log_error("Direct3DCreate9 failed\n"); - _status = DEVICE_FAIL; - } -} - -CD3D9Wrapper::~CD3D9Wrapper() -{ - Destroy(); - - if(_d3d9) - _d3d9->Release(); - _d3d9 = 0; -} - -void CD3D9Wrapper::Destroy() -{ - if (_d3dDevice) - _d3dDevice->Release(); - _d3dDevice = 0; -} - -cl_int CD3D9Wrapper::Init() -{ - if (!WindowHandle()) - { - log_error("D3D9: Window is not created\n"); - _status = DEVICE_FAIL; - return DEVICE_FAIL; - } - - if(!_d3d9 || DEVICE_PASS != _status || !_adapterFound) - return false; - - _d3d9->GetAdapterDisplayMode(_adapterIdx - 1, &_d3ddm); - - D3DPRESENT_PARAMETERS d3dParams; - ZeroMemory(&d3dParams, sizeof(d3dParams)); - - d3dParams.Windowed = TRUE; - d3dParams.BackBufferCount = 1; - d3dParams.SwapEffect = D3DSWAPEFFECT_DISCARD; - d3dParams.hDeviceWindow = WindowHandle(); - d3dParams.BackBufferWidth = WindowWidth(); - d3dParams.BackBufferHeight = WindowHeight(); - d3dParams.BackBufferFormat = _d3ddm.Format; - - DWORD processingType = (AccelerationType() == ACCELERATION_HW)? D3DCREATE_HARDWARE_VERTEXPROCESSING: - D3DCREATE_SOFTWARE_VERTEXPROCESSING; - - if ( FAILED( _d3d9->CreateDevice( _adapterIdx - 1, D3DDEVTYPE_HAL, WindowHandle(), - processingType, &d3dParams, &_d3dDevice) ) ) - { - log_error("CreateDevice failed\n"); - _status = DEVICE_FAIL; - return DEVICE_FAIL; - } - - _d3dDevice->BeginScene(); - _d3dDevice->Clear(0, NULL, D3DCLEAR_TARGET, 0, 1.0f, 0); - _d3dDevice->EndScene(); - - return true; -} - -void * CD3D9Wrapper::D3D() const -{ - return _d3d9; -} - -void *CD3D9Wrapper::Device() const -{ - return _d3dDevice; -} - -D3DFORMAT CD3D9Wrapper::Format() -{ - return _d3ddm.Format; -} - -D3DADAPTER_IDENTIFIER9 CD3D9Wrapper::Adapter() -{ - return _adapter; -} - -TDeviceStatus CD3D9Wrapper::Status() const -{ - return _status; -} - -bool CD3D9Wrapper::AdapterNext() -{ - if (DEVICE_PASS != _status) - return false; - - _adapterFound = false; - for(; _adapterIdx < _d3d9->GetAdapterCount();) - { - ++_adapterIdx; - D3DCAPS9 caps; - if (FAILED(_d3d9->GetDeviceCaps(_adapterIdx - 1, D3DDEVTYPE_HAL, &caps))) - continue; - - if(FAILED(_d3d9->GetAdapterIdentifier(_adapterIdx - 1, 0, &_adapter))) - { - log_error("D3D9: GetAdapterIdentifier failed\n"); - _status = DEVICE_FAIL; - return false; - } - - _adapterFound = true; - - Destroy(); - if(!Init()) - { - _status = DEVICE_FAIL; - _adapterFound = false; - } - break; - } - - return _adapterFound; -} - -unsigned int CD3D9Wrapper::AdapterIdx() const -{ - return _adapterIdx - 1; -} - - -CD3D9ExWrapper::CD3D9ExWrapper(): -_d3d9Ex(NULL), _d3dDeviceEx(NULL), _status(DEVICE_PASS), _adapterIdx(0), _adapterFound(false) -{ - WindowInit(); - - HRESULT result = Direct3DCreate9Ex(D3D_SDK_VERSION, &_d3d9Ex); - if (FAILED(result) || !_d3d9Ex) - { - log_error("Direct3DCreate9Ex failed\n"); - _status = DEVICE_FAIL; - } -} - -CD3D9ExWrapper::~CD3D9ExWrapper() -{ - Destroy(); - - if(_d3d9Ex) - _d3d9Ex->Release(); - _d3d9Ex = 0; -} - -void * CD3D9ExWrapper::D3D() const -{ - return _d3d9Ex; -} - -void *CD3D9ExWrapper::Device() const -{ - return _d3dDeviceEx; -} - -D3DFORMAT CD3D9ExWrapper::Format() -{ - return _d3ddmEx.Format; -} - -D3DADAPTER_IDENTIFIER9 CD3D9ExWrapper::Adapter() -{ - return _adapter; -} - -cl_int CD3D9ExWrapper::Init() -{ - if (!WindowHandle()) - { - log_error("D3D9EX: Window is not created\n"); - _status = DEVICE_FAIL; - return DEVICE_FAIL; - } - - if(!_d3d9Ex || DEVICE_FAIL == _status || !_adapterFound) - return DEVICE_FAIL; - - RECT rect; - GetClientRect(WindowHandle(),&rect); - - D3DPRESENT_PARAMETERS d3dParams; - ZeroMemory(&d3dParams, sizeof(d3dParams)); - - d3dParams.Windowed = TRUE; - d3dParams.SwapEffect = D3DSWAPEFFECT_FLIP; - d3dParams.BackBufferFormat = D3DFMT_X8R8G8B8; - d3dParams.BackBufferWidth = WindowWidth(); - d3dParams.BackBufferHeight = WindowHeight(); - - d3dParams.BackBufferCount = 1; - d3dParams.hDeviceWindow = WindowHandle(); - - DWORD processingType = (AccelerationType() == ACCELERATION_HW)? D3DCREATE_HARDWARE_VERTEXPROCESSING: - D3DCREATE_SOFTWARE_VERTEXPROCESSING; - - if ( FAILED( _d3d9Ex->CreateDeviceEx( _adapterIdx - 1, D3DDEVTYPE_HAL, WindowHandle(), - processingType, &d3dParams, NULL, &_d3dDeviceEx) ) ) - { - log_error("CreateDeviceEx failed\n"); - _status = DEVICE_FAIL; - return DEVICE_FAIL; - } - - _d3dDeviceEx->BeginScene(); - _d3dDeviceEx->Clear(0, NULL, D3DCLEAR_TARGET, 0, 1.0f, 0); - _d3dDeviceEx->EndScene(); - - return DEVICE_PASS; -} - -void CD3D9ExWrapper::Destroy() -{ - if (_d3dDeviceEx) - _d3dDeviceEx->Release(); - _d3dDeviceEx = 0; -} - -TDeviceStatus CD3D9ExWrapper::Status() const -{ - return _status; -} - -bool CD3D9ExWrapper::AdapterNext() -{ - if (DEVICE_FAIL == _status) - return false; - - _adapterFound = false; - for(; _adapterIdx < _d3d9Ex->GetAdapterCount();) - { - ++_adapterIdx; - D3DCAPS9 caps; - if (FAILED(_d3d9Ex->GetDeviceCaps(_adapterIdx - 1, D3DDEVTYPE_HAL, &caps))) - continue; - - if(FAILED(_d3d9Ex->GetAdapterIdentifier(_adapterIdx - 1, 0, &_adapter))) - { - log_error("D3D9EX: GetAdapterIdentifier failed\n"); - _status = DEVICE_FAIL; - return false; - } - - _adapterFound = true; - Destroy(); - if(!Init()) - { - _status = DEVICE_FAIL; - _adapterFound = _status; - } - - break; - } - - return _adapterFound; -} - -unsigned int CD3D9ExWrapper::AdapterIdx() const -{ - return _adapterIdx - 1; -} - -CDXVAWrapper::CDXVAWrapper(): -_dxvaDevice(NULL), _status(DEVICE_PASS), _adapterFound(false) -{ - _status = _d3d9.Status(); -} - -CDXVAWrapper::~CDXVAWrapper() -{ - DXVAHDDestroy(); -} - -void * CDXVAWrapper::Device() const -{ - return _dxvaDevice; -} - -TDeviceStatus CDXVAWrapper::Status() const -{ - if(_status == DEVICE_FAIL || _d3d9.Status() == DEVICE_FAIL) - return DEVICE_FAIL; - else if(_status == DEVICE_NOTSUPPORTED || _d3d9.Status() == DEVICE_NOTSUPPORTED) - return DEVICE_NOTSUPPORTED; - else - return DEVICE_PASS; -} - -bool CDXVAWrapper::AdapterNext() -{ - if (DEVICE_PASS != _status) - return false; - - _adapterFound = _d3d9.AdapterNext(); - _status = _d3d9.Status(); - if (DEVICE_PASS != _status) - { - _adapterFound = false; - return false; - } - - if (!_adapterFound) - return false; - - DXVAHDDestroy(); - _status = DXVAHDInit(); - if (DEVICE_PASS != _status) - { - _adapterFound = false; - return false; - } - - return true; -} - -TDeviceStatus CDXVAWrapper::DXVAHDInit() -{ - if ((_status == DEVICE_FAIL) || (_d3d9.Status() == DEVICE_FAIL) || !_adapterFound) - return DEVICE_FAIL; - - DXVAHD_RATIONAL fps = { VIDEO_FPS, 1 }; - - DXVAHD_CONTENT_DESC desc; - desc.InputFrameFormat= DXVAHD_FRAME_FORMAT_PROGRESSIVE; - desc.InputFrameRate = fps; - desc.InputWidth = WindowWidth(); - desc.InputHeight = WindowHeight(); - desc.OutputFrameRate = fps; - desc.OutputWidth = WindowWidth(); - desc.OutputHeight = WindowHeight(); - -#ifdef USE_SOFTWARE_PLUGIN - _status = DEVICE_FAIL; - return DEVICE_FAIL; -#endif - - HRESULT hr = DXVAHD_CreateDevice(static_cast(_d3d9.Device()), - &desc, DXVAHD_DEVICE_USAGE_PLAYBACK_NORMAL, NULL, &_dxvaDevice); - if(FAILED(hr)) - { - if (hr == E_NOINTERFACE) - { - log_error("DXVAHD_CreateDevice skipped due to no supported devices!\n"); - _status = DEVICE_NOTSUPPORTED; - } - else - { - log_error("DXVAHD_CreateDevice failed\n"); - _status = DEVICE_FAIL; - } - } - - return _status; -} - -void CDXVAWrapper::DXVAHDDestroy() -{ - if (_dxvaDevice) - _dxvaDevice->Release(); - _dxvaDevice = 0; -} - -void * CDXVAWrapper::D3D() const -{ - return _d3d9.D3D(); -} - -unsigned int CDXVAWrapper::AdapterIdx() const -{ - return _d3d9.AdapterIdx(); -} - -const CD3D9ExWrapper & CDXVAWrapper::D3D9() const -{ - return _d3d9; -} - -CD3D9SurfaceWrapper::CD3D9SurfaceWrapper(): -mMem(NULL) -{ - -} - -CD3D9SurfaceWrapper::CD3D9SurfaceWrapper( IDirect3DSurface9* mem ): -mMem(mem) -{ - -} - -CD3D9SurfaceWrapper::~CD3D9SurfaceWrapper() -{ - if(mMem != NULL) - mMem->Release(); - mMem = NULL; -} - -#endif - -CSurfaceWrapper::CSurfaceWrapper() -{ - -} - -CSurfaceWrapper::~CSurfaceWrapper() -{ - -} diff --git a/test_extensions/media_sharing/wrappers.h b/test_extensions/media_sharing/wrappers.h deleted file mode 100644 index 45b70326d1..0000000000 --- a/test_extensions/media_sharing/wrappers.h +++ /dev/null @@ -1,197 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef __WRAPPERS_H -#define __WRAPPERS_H - -#if defined(_WIN32) -#include -#if defined (__MINGW32__) -#include -typedef unsigned char UINT8; -#define __out -#define __in -#define __inout -#define __out_bcount(size) -#define __out_bcount_opt(size) -#define __in_opt -#define __in_ecount(size) -#define __in_ecount_opt(size) -#define __out_opt -#define __out_ecount(size) -#define __out_ecount_opt(size) -#define __in_bcount_opt(size) -#define __inout_opt -#define __inout_bcount(size) -#define __in_bcount(size) -#define __deref_out -#endif -#include -#include -#endif - -enum TDeviceStatus -{ - DEVICE_NOTSUPPORTED, - DEVICE_PASS, - DEVICE_FAIL, -}; - -class CDeviceWrapper { -public: - enum TAccelerationType - { - ACCELERATION_HW, - ACCELERATION_SW, - }; - - CDeviceWrapper(); - virtual ~CDeviceWrapper(); - - virtual bool AdapterNext() = 0; - virtual unsigned int AdapterIdx() const = 0; - virtual void *Device() const = 0; - virtual TDeviceStatus Status() const = 0; - virtual void *D3D() const = 0; - -#if defined(_WIN32) - HWND WindowHandle() const; -#endif - int WindowWidth() const; - int WindowHeight() const; - void WindowInit(); - - - static TAccelerationType AccelerationType(); - static void AccelerationType(TAccelerationType accelerationTypeNew); - -private: - static LPCTSTR WINDOW_TITLE; - static const int WINDOW_WIDTH; - static const int WINDOW_HEIGHT; - static TAccelerationType accelerationType; - -#if defined(_WIN32) - HMODULE _hInstance; - HWND _hWnd; -#endif - - void WindowDestroy(); -}; - -class CSurfaceWrapper -{ -public: - CSurfaceWrapper(); - virtual ~CSurfaceWrapper(); -}; - -#if defined(_WIN32) -//windows specific wrappers -class CD3D9Wrapper: public CDeviceWrapper { -public: - CD3D9Wrapper(); - ~CD3D9Wrapper(); - - virtual bool AdapterNext(); - virtual unsigned int AdapterIdx() const; - virtual void *Device() const; - virtual TDeviceStatus Status() const; - virtual void *D3D() const; - -private: - LPDIRECT3D9 _d3d9; - LPDIRECT3DDEVICE9 _d3dDevice; - D3DDISPLAYMODE _d3ddm; - D3DADAPTER_IDENTIFIER9 _adapter; - TDeviceStatus _status; - unsigned int _adapterIdx; - bool _adapterFound; - - D3DFORMAT Format(); - D3DADAPTER_IDENTIFIER9 Adapter(); - int Init(); - void Destroy(); -}; - -class CD3D9ExWrapper: public CDeviceWrapper { -public: - CD3D9ExWrapper(); - ~CD3D9ExWrapper(); - - virtual bool AdapterNext(); - virtual unsigned int AdapterIdx() const; - virtual void *Device() const; - virtual TDeviceStatus Status() const; - virtual void *D3D() const; - -private: - LPDIRECT3D9EX _d3d9Ex; - LPDIRECT3DDEVICE9EX _d3dDeviceEx; - D3DDISPLAYMODEEX _d3ddmEx; - D3DADAPTER_IDENTIFIER9 _adapter; - TDeviceStatus _status; - unsigned int _adapterIdx; - bool _adapterFound; - - D3DFORMAT Format(); - D3DADAPTER_IDENTIFIER9 Adapter(); - int Init(); - void Destroy(); -}; - -class CDXVAWrapper: public CDeviceWrapper { -public: - CDXVAWrapper(); - ~CDXVAWrapper(); - - virtual bool AdapterNext(); - virtual unsigned int AdapterIdx() const; - virtual void *Device() const; - virtual TDeviceStatus Status() const; - virtual void *D3D() const; - const CD3D9ExWrapper &D3D9() const; - -private: - CD3D9ExWrapper _d3d9; - IDXVAHD_Device *_dxvaDevice; - TDeviceStatus _status; - bool _adapterFound; - - static const D3DFORMAT RENDER_TARGET_FORMAT; - static const D3DFORMAT VIDEO_FORMAT; - static const unsigned int VIDEO_FPS; - - TDeviceStatus DXVAHDInit(); - void DXVAHDDestroy(); -}; - -class CD3D9SurfaceWrapper: public CSurfaceWrapper -{ -public: - CD3D9SurfaceWrapper(); - CD3D9SurfaceWrapper( IDirect3DSurface9* mem ); - ~CD3D9SurfaceWrapper(); - - operator IDirect3DSurface9*() { return mMem; } - IDirect3DSurface9* * operator&() { return &mMem; } - IDirect3DSurface9* operator->() const { return mMem; } - -private: - IDirect3DSurface9* mMem; -}; -#endif - -#endif // __D3D_WRAPPERS