From 88b0a026991aaff0c4cc350e5e84e9fea5653d2e Mon Sep 17 00:00:00 2001 From: Romaric Jodin Date: Mon, 17 Apr 2023 20:36:54 +0200 Subject: [PATCH] Add first support for generic address space This is taking over some work from #994 Fixes #1077 --- include/clspv/FeatureMacro.h | 7 +- include/clspv/Option.h | 3 +- lib/BuiltinsEnum.h | 3 + lib/BuiltinsMap.inc | 3 + lib/CMakeLists.txt | 1 + lib/Compiler.cpp | 7 + lib/FeatureMacro.cpp | 1 - lib/LowerAddrSpaceCastPass.cpp | 287 ++++++++++++++++++ lib/LowerAddrSpaceCastPass.h | 67 ++++ lib/PassRegistry.def | 1 + lib/Passes.h | 1 + lib/ReplaceOpenCLBuiltinPass.cpp | 19 ++ lib/ReplaceOpenCLBuiltinPass.h | 2 + .../generic_volatile_memory_access.cl | 31 ++ test/AddressSpaceCast/incompatible_cast.ll | 72 +++++ test/AddressSpaceCast/issue-1077.ll | 79 +++++ test/AddressSpaceCast/to_global.cl | 18 ++ test/AddressSpaceCast/to_global.ll | 80 +++++ test/AddressSpaceCast/to_local.cl | 18 ++ test/AddressSpaceCast/to_local.ll | 80 +++++ test/AddressSpaceCast/to_private.cl | 19 ++ test/AddressSpaceCast/to_private.ll | 83 +++++ test/AtomicBuiltins/atomic_flag.cl | 2 +- test/AtomicBuiltins/atomic_flag_errors.cl | 2 +- test/AtomicBuiltins/atomic_flag_warnings.cl | 2 +- test/AtomicBuiltins/atomic_init.cl | 2 +- test/AtomicBuiltins/enum_values.cl | 2 +- test/Diagnostics/dse-unsupported-cl3.cl | 2 +- test/Diagnostics/pipes-unsupported-cl3.cl | 2 +- test/Features/cl3-all-features.cl | 2 +- test/Features/cl3-disabled-features.cl | 2 +- test/Features/cl3-no-features.cl | 2 +- test/Features/cl3-some-features.cl | 2 +- test/Features/fp64-default-cl3.cl | 2 +- test/Features/fp64-disabled-cl3.cl | 2 +- test/SubGroup/get_sub_group_max_size.cl | 2 +- .../atomic_work_item_fence.cl | 2 +- test/opsource.cl | 2 +- 38 files changed, 892 insertions(+), 22 deletions(-) create mode 100644 lib/LowerAddrSpaceCastPass.cpp create mode 100644 lib/LowerAddrSpaceCastPass.h create mode 100644 test/AddressSpaceCast/generic_volatile_memory_access.cl create mode 100644 test/AddressSpaceCast/incompatible_cast.ll create mode 100644 test/AddressSpaceCast/issue-1077.ll create mode 100644 test/AddressSpaceCast/to_global.cl create mode 100644 test/AddressSpaceCast/to_global.ll create mode 100644 test/AddressSpaceCast/to_local.cl create mode 100644 test/AddressSpaceCast/to_local.ll create mode 100644 test/AddressSpaceCast/to_private.cl create mode 100644 test/AddressSpaceCast/to_private.ll diff --git a/include/clspv/FeatureMacro.h b/include/clspv/FeatureMacro.h index 2b40f3a9da..5bf33c39b8 100644 --- a/include/clspv/FeatureMacro.h +++ b/include/clspv/FeatureMacro.h @@ -30,7 +30,6 @@ enum class FeatureMacro { __opencl_c_subgroups, // following items are not supported __opencl_c_device_enqueue, - __opencl_c_generic_address_space, __opencl_c_pipes, __opencl_c_program_scope_global_variables, // following items are always enabled, but no point in complaining if they are @@ -44,6 +43,7 @@ enum class FeatureMacro { __opencl_c_read_write_images, __opencl_c_atomic_scope_device, __opencl_c_atomic_scope_all_devices, + __opencl_c_generic_address_space, __opencl_c_work_group_collective_functions }; @@ -53,6 +53,7 @@ constexpr std::array, 15> FeatureStr(__opencl_c_3d_image_writes), FeatureStr(__opencl_c_atomic_order_acq_rel), FeatureStr(__opencl_c_fp64), FeatureStr(__opencl_c_images), + FeatureStr(__opencl_c_generic_address_space), FeatureStr(__opencl_c_subgroups), // following items are always enabled by clang FeatureStr(__opencl_c_int64), @@ -62,9 +63,7 @@ constexpr std::array, 15> FeatureStr(__opencl_c_atomic_scope_all_devices), FeatureStr(__opencl_c_work_group_collective_functions), // following items cannot be enabled so are automatically disabled - FeatureStr(__opencl_c_device_enqueue), - FeatureStr(__opencl_c_generic_address_space), - FeatureStr(__opencl_c_pipes), + FeatureStr(__opencl_c_device_enqueue), FeatureStr(__opencl_c_pipes), FeatureStr(__opencl_c_program_scope_global_variables)}; #undef FeatureStr diff --git a/include/clspv/Option.h b/include/clspv/Option.h index 5b194ed6c6..bcff992330 100644 --- a/include/clspv/Option.h +++ b/include/clspv/Option.h @@ -172,7 +172,8 @@ SourceLanguage Language(); // Returns true when the source language makes use of the generic address space. inline bool LanguageUsesGenericAddressSpace() { return (Language() == SourceLanguage::OpenCL_CPP) || - ((Language() == SourceLanguage::OpenCL_C_20)); + (Language() == SourceLanguage::OpenCL_C_20) || + (Language() == SourceLanguage::OpenCL_C_30); } // Return the SPIR-V binary version diff --git a/lib/BuiltinsEnum.h b/lib/BuiltinsEnum.h index 7c602480f2..8e6f8d83ef 100644 --- a/lib/BuiltinsEnum.h +++ b/lib/BuiltinsEnum.h @@ -110,6 +110,9 @@ enum BuiltinType : unsigned int { kMemFence, kReadMemFence, kWriteMemFence, + kToGlobal, + kToLocal, + kToPrivate, kType_MemoryFence_End, kType_Geometric_Start, diff --git a/lib/BuiltinsMap.inc b/lib/BuiltinsMap.inc index 7905a00664..c2424c7139 100644 --- a/lib/BuiltinsMap.inc +++ b/lib/BuiltinsMap.inc @@ -832,6 +832,9 @@ static std::unordered_map NotSuppported{ FeatureMacro::__opencl_c_pipes, - FeatureMacro::__opencl_c_generic_address_space, FeatureMacro::__opencl_c_device_enqueue, FeatureMacro::__opencl_c_program_scope_global_variables}; diff --git a/lib/LowerAddrSpaceCastPass.cpp b/lib/LowerAddrSpaceCastPass.cpp new file mode 100644 index 0000000000..2e1d4008d0 --- /dev/null +++ b/lib/LowerAddrSpaceCastPass.cpp @@ -0,0 +1,287 @@ +// Copyright 2023 The Clspv Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "LowerAddrSpaceCastPass.h" +#include "BitcastUtils.h" +#include "clspv/AddressSpace.h" +#include "Types.h" + +#include "llvm/IR/Constants.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Transforms/Utils/Local.h" + +using namespace llvm; + +#define DEBUG_TYPE "LowerAddrSpaceCast" + +namespace { + +using PartitionCallback = std::function; + +/// Partition the @p Instructions based on their liveness. +void partitionInstructions(ArrayRef Instructions, + PartitionCallback OnDead, + PartitionCallback OnAlive) { + for (auto OldValueHandle : Instructions) { + // Handle situations when the weak handle is no longer valid. + if (!OldValueHandle.pointsToAliveValue()) { + continue; // Nothing else to do for this handle. + } + + auto *OldInstruction = cast(OldValueHandle); + bool Dead = OldInstruction->use_empty(); + if (Dead) { + OnDead(OldInstruction); + } else { + OnAlive(OldInstruction); + } + } +} + +bool isGenericPTy(Type *Ty) { + return Ty && Ty->isPointerTy() && + Ty->getPointerAddressSpace() == clspv::AddressSpace::Generic; +} +} // namespace + +PreservedAnalyses clspv::LowerAddrSpaceCastPass::run(Module &M, + ModuleAnalysisManager &) { + PreservedAnalyses PA; + + for (auto &F : M.functions()) { + BitcastUtils::RemoveCstExprFromFunction(&F); + runOnFunction(F); + } + + return PA; +} + +Value *clspv::LowerAddrSpaceCastPass::visit(Value *V) { + auto it = ValueMap.find(V); + if (it != ValueMap.end()) { + return it->second; + } + auto *I = dyn_cast(V); + if (I == nullptr) { + return V; + } + + if (auto *alloca = dyn_cast(I)) { + if (alloca->getAllocatedType()->isPointerTy() && + alloca->getAllocatedType()->getPointerAddressSpace() != + clspv::AddressSpace::Private) { + return visit(alloca); + } + } + + if (isGenericPTy(I->getType())) { + return visit(I); + } + + for (auto &Operand : I->operands()) { + if (isGenericPTy(Operand->getType())) { + return visit(I); + } + } + + return V; +} + +llvm::Value * +clspv::LowerAddrSpaceCastPass::visitAllocaInst(llvm::AllocaInst &I) { + IRBuilder<> B(&I); + auto alloca = B.CreateAlloca( + PointerType::get(I.getContext(), clspv::AddressSpace::Private), + I.getArraySize(), I.getName()); + registerReplacement(&I, alloca); + return alloca; +} + +llvm::Value *clspv::LowerAddrSpaceCastPass::visitLoadInst(llvm::LoadInst &I) { + IRBuilder<> B(&I); + Type *Ty = I.getType(); + Value *Ptr = visit(I.getPointerOperand()); + if (isGenericPTy(Ty)) { + Ty = clspv::InferType(Ptr, I.getContext(), &TypeCache); + } + auto load = B.CreateLoad(Ty, Ptr, I.getName()); + registerReplacement(&I, load); + if (!isGenericPTy(I.getType())) { + I.replaceAllUsesWith(load); + } + return load; +} + +llvm::Value *clspv::LowerAddrSpaceCastPass::visitStoreInst(llvm::StoreInst &I) { + IRBuilder<> B(&I); + Value *Val = visit(I.getValueOperand()); + Value *Ptr = visit(I.getPointerOperand()); + if (isa(Val)) { + Val = ConstantPointerNull::get(PointerType::get( + I.getContext(), clspv::InferType(Ptr, I.getContext(), &TypeCache) + ->getPointerAddressSpace())); + } + auto store = B.CreateStore(Val, Ptr); + registerReplacement(&I, store); + return store; +} + +llvm::Value *clspv::LowerAddrSpaceCastPass::visitGetElementPtrInst( + llvm::GetElementPtrInst &I) { + IRBuilder<> B(&I); + auto gep = B.CreateGEP(I.getSourceElementType(), visit(I.getPointerOperand()), + SmallVector{I.indices()}, I.getName(), + I.isInBounds()); + registerReplacement(&I, gep); + return gep; +} + +llvm::Value *clspv::LowerAddrSpaceCastPass::visitAddrSpaceCastInst( + llvm::AddrSpaceCastInst &I) { + auto ptr = visit(I.getPointerOperand()); + // Returns a pointer that points to a region in the address space if + // "to_addrspace" can cast ptr to the address space. Otherwise it returns + // NULL. + if (ptr->getType() != I.getSrcTy() && ptr->getType() != I.getDestTy()) { + ptr = ConstantPointerNull::get(cast(I.getType())); + I.replaceAllUsesWith(ptr); + } + registerReplacement(&I, ptr); + return ptr; +} + +llvm::Value *clspv::LowerAddrSpaceCastPass::visitICmpInst(llvm::ICmpInst &I) { + IRBuilder<> B(&I); + Value *Op0 = visit(I.getOperand(0)); + Value *Op1 = visit(I.getOperand(1)); + if (Op0->getType() != Op1->getType()) { + if (isa(Op0)) { + Op0 = ConstantPointerNull::get(cast(Op1->getType())); + } else if (isa(Op1)) { + Op1 = ConstantPointerNull::get(cast(Op0->getType())); + } else { + llvm_unreachable("unsupported operand of icmp in loweraddrspacecast"); + } + } + + auto icmp = B.CreateICmp(I.getPredicate(), Op0, Op1, I.getName()); + registerReplacement(&I, icmp); + I.replaceAllUsesWith(icmp); + return icmp; +} + +Value *clspv::LowerAddrSpaceCastPass::visitInstruction(Instruction &I) { +#ifndef NDEBUG + dbgs() << "Instruction not handled: " << I << '\n'; +#endif + llvm_unreachable("Missing support for instruction"); +} + +void clspv::LowerAddrSpaceCastPass::registerReplacement(Value *U, Value *V) { + LLVM_DEBUG(dbgs() << "Replacement for " << *U << ": " << *V << '\n'); + assert(ValueMap.count(U) == 0 && "Value already registered"); + ValueMap.insert({U, V}); +} + +void clspv::LowerAddrSpaceCastPass::runOnFunction(Function &F) { + LLVM_DEBUG(dbgs() << "Processing " << F.getName() << '\n'); + + // Skip declarations. + if (F.isDeclaration()) { + return; + } + for (Instruction &I : instructions(&F)) { + // Use the Value overload of visit to ensure cache is used. + visit(static_cast(&I)); + } + + cleanDeadInstructions(); + + LLVM_DEBUG(dbgs() << "Final version for " << F.getName() << '\n'); + LLVM_DEBUG(dbgs() << F << '\n'); +} + +void clspv::LowerAddrSpaceCastPass::cleanDeadInstructions() { + // Collect all instructions that have been replaced by another one, and remove + // them from the function. To address dependencies, use a fixed-point + // algorithm: + // 1. Collect the instructions that have been replaced. + // 2. Collect among these instructions the ones which have no uses and remove + // them. + // 3. Repeat step 2 until no progress is made. + + // Select instructions that were replaced by another one. + // Ignore constants as they are not owned by the module and therefore don't + // need to be removed. + using WeakInstructions = SmallVector; + WeakInstructions OldInstructions; + for (const auto &Mapping : ValueMap) { + if (Mapping.getSecond() != nullptr) { + if (auto *OldInstruction = dyn_cast(Mapping.getFirst())) { + OldInstructions.push_back(OldInstruction); + } else { + assert(isa(Mapping.getFirst()) && + "Only Instruction and Constant are expected in ValueMap"); + } + } + } + + // Erase any mapping, as they won't be valid anymore. + ValueMap.clear(); + + for (bool Progress = true; Progress;) { + std::size_t PreviousSize = OldInstructions.size(); + + // Identify instructions that are actually dead and can be removed using + // RecursivelyDeleteTriviallyDeadInstructions. + // Use a third buffer to capture the instructions that are still alive to + // avoid mutating OldInstructions while iterating over it. + WeakInstructions NextBatch; + WeakInstructions TriviallyDeads; + partitionInstructions( + OldInstructions, + [&TriviallyDeads](Instruction *DeadInstruction) { + // Additionally, manually remove from the parent instructions with + // possible side-effect, generally speaking, such as call or alloca + // instructions. Those are not trivially dead. + if (isInstructionTriviallyDead(DeadInstruction)) { + TriviallyDeads.push_back(DeadInstruction); + } else { + DeadInstruction->eraseFromParent(); + } + }, + [&NextBatch](Instruction *AliveInstruction) { + NextBatch.push_back(AliveInstruction); + }); + + RecursivelyDeleteTriviallyDeadInstructions(TriviallyDeads); + + // Update OldInstructions for the next iteration of the fixed-point. + OldInstructions = std::move(NextBatch); + Progress = (OldInstructions.size() < PreviousSize); + } + +#ifndef NDEBUG + if (!OldInstructions.empty()) { + dbgs() << "These values were expected to be removed:\n"; + for (auto ValueHandle : OldInstructions) { + dbgs() << '\t' << *ValueHandle << '\n'; + } + llvm_unreachable("Not all supposedly-dead instruction were removed!"); + } +#endif +} diff --git a/lib/LowerAddrSpaceCastPass.h b/lib/LowerAddrSpaceCastPass.h new file mode 100644 index 0000000000..a75d4f6466 --- /dev/null +++ b/lib/LowerAddrSpaceCastPass.h @@ -0,0 +1,67 @@ +// Copyright 2023 The Clspv Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef _CLSPV_LIB_ADDRSPACECAST_PASS_H +#define _CLSPV_LIB_ADDRSPACECAST_PASS_H + +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" + +#include "clspv/AddressSpace.h" + +namespace clspv { +struct LowerAddrSpaceCastPass + : llvm::PassInfoMixin, + llvm::InstVisitor { + llvm::PreservedAnalyses run(llvm::Module &M, llvm::ModuleAnalysisManager &); + +private: + // Implementation details for InstVisitor + using Visitor = llvm::InstVisitor; + using Visitor::visit; + friend Visitor; + + llvm::Value *visit(llvm::Value *V); + llvm::Value *visitAllocaInst(llvm::AllocaInst &I); + llvm::Value *visitLoadInst(llvm::LoadInst &I); + llvm::Value *visitStoreInst(llvm::StoreInst &I); + llvm::Value *visitGetElementPtrInst(llvm::GetElementPtrInst &I); + llvm::Value *visitAddrSpaceCastInst(llvm::AddrSpaceCastInst &I); + llvm::Value *visitICmpInst(llvm::ICmpInst &I); + llvm::Value *visitInstruction(llvm::Instruction &I); + + void runOnFunction(llvm::Function &F); + + void registerReplacement(llvm::Value *U, llvm::Value *V); + + /// Clears the dead instructions and others that might be rendered dead + /// by their removal. + void cleanDeadInstructions(); + + /// A map between original values and their replacement. + /// + /// The content of this mapping is valid only for the function being visited + /// at a given time. The keys in this mapping should be removed from the + /// function once all instructions in the current function have been visited + /// and transformed. Instructions are not removed from the function as they + /// are visited because this would invalidate iterators. + llvm::DenseMap ValueMap; + + llvm::DenseMap TypeCache; +}; +} // namespace clspv + +#endif // _CLSPV_LIB_ADDRSPACECAST_PASS_H diff --git a/lib/PassRegistry.def b/lib/PassRegistry.def index 996b581594..8bbf77dcab 100644 --- a/lib/PassRegistry.def +++ b/lib/PassRegistry.def @@ -36,6 +36,7 @@ MODULE_PASS("inline-func-with-single-call-site", clspv::InlineFuncWithSingleCall MODULE_PASS("logical-pointer-to-int", clspv::LogicalPointerToIntPass) MODULE_PASS("long-vector-lowering", clspv::LongVectorLoweringPass) MODULE_PASS("set-image-channel-metadata", clspv::SetImageChannelMetadataPass) +MODULE_PASS("lower-addrspacecast", clspv::LowerAddrSpaceCastPass) MODULE_PASS("multi-version-ubo-functions", clspv::MultiVersionUBOFunctionsPass) MODULE_PASS("native-math", clspv::NativeMathPass) MODULE_PASS("opencl-inliner", clspv::OpenCLInlinerPass) diff --git a/lib/Passes.h b/lib/Passes.h index 89ceaca915..3da3d6a9ab 100644 --- a/lib/Passes.h +++ b/lib/Passes.h @@ -34,6 +34,7 @@ #include "InlineFuncWithSingleCallSitePass.h" #include "LogicalPointerToIntPass.h" #include "LongVectorLoweringPass.h" +#include "LowerAddrSpaceCastPass.h" #include "MultiVersionUBOFunctionsPass.h" #include "NativeMathPass.h" #include "OpenCLInlinerPass.h" diff --git a/lib/ReplaceOpenCLBuiltinPass.cpp b/lib/ReplaceOpenCLBuiltinPass.cpp index 295e2a1b9d..c682be204d 100644 --- a/lib/ReplaceOpenCLBuiltinPass.cpp +++ b/lib/ReplaceOpenCLBuiltinPass.cpp @@ -79,6 +79,9 @@ std::set ReplaceOpenCLBuiltinPass::ReplaceableBuiltins = Builtins::kMemFence, Builtins::kReadMemFence, Builtins::kWriteMemFence, + Builtins::kToGlobal, + Builtins::kToLocal, + Builtins::kToPrivate, Builtins::kIsequal, Builtins::kIsgreater, Builtins::kIsgreaterequal, @@ -528,6 +531,12 @@ bool ReplaceOpenCLBuiltinPass::runOnFunction(Function &F) { return replaceAtomicLoad(F); case Builtins::kGetFence: return replaceGetFence(F); + case Builtins::kToGlobal: + return replaceAddressSpaceQualifiers(F, AddressSpace::Global); + case Builtins::kToLocal: + return replaceAddressSpaceQualifiers(F, AddressSpace::Local); + case Builtins::kToPrivate: + return replaceAddressSpaceQualifiers(F, AddressSpace::Private); case Builtins::kAtomicInit: case Builtins::kAtomicStore: case Builtins::kAtomicStoreExplicit: @@ -3572,6 +3581,16 @@ bool ReplaceOpenCLBuiltinPass::replaceGetFence(Function &F) { }); } +bool ReplaceOpenCLBuiltinPass::replaceAddressSpaceQualifiers( + Function &F, AddressSpace::Type addrspace) { + return replaceCallsWithValue(F, [&F, addrspace](CallInst *Call) { + auto ptr = Call->getArgOperand(0); + IRBuilder<> builder(Call); + return builder.CreateAddrSpaceCast( + ptr, PointerType::get(F.getContext(), addrspace)); + }); +} + bool ReplaceOpenCLBuiltinPass::replaceExplicitAtomics( Function &F, spv::Op Op, spv::MemorySemanticsMask semantics) { return replaceCallsWithValue(F, [Op, semantics](CallInst *Call) { diff --git a/lib/ReplaceOpenCLBuiltinPass.h b/lib/ReplaceOpenCLBuiltinPass.h index 3253b2dc77..cd3632f663 100644 --- a/lib/ReplaceOpenCLBuiltinPass.h +++ b/lib/ReplaceOpenCLBuiltinPass.h @@ -91,6 +91,8 @@ struct ReplaceOpenCLBuiltinPass bool replaceAtomics(llvm::Function &F, llvm::AtomicRMWInst::BinOp Op); bool replaceAtomicLoad(llvm::Function &F); bool replaceGetFence(llvm::Function &F); + bool replaceAddressSpaceQualifiers(llvm::Function &F, + clspv::AddressSpace::Type addrspace); bool replaceExplicitAtomics(llvm::Function &F, spv::Op Op, spv::MemorySemanticsMask semantics = spv::MemorySemanticsAcquireReleaseMask); diff --git a/test/AddressSpaceCast/generic_volatile_memory_access.cl b/test/AddressSpaceCast/generic_volatile_memory_access.cl new file mode 100644 index 0000000000..f3b8b41add --- /dev/null +++ b/test/AddressSpaceCast/generic_volatile_memory_access.cl @@ -0,0 +1,31 @@ +// RUN: clspv %target %s -o %t.spv -cl-std=CL2.0 -inline-entry-points +// RUN: spirv-val --target-env vulkan1.0 %t.spv + +bool isFenceValid(cl_mem_fence_flags fence) { + if ((fence == 0) || (fence == CLK_GLOBAL_MEM_FENCE) || + (fence == CLK_LOCAL_MEM_FENCE) || + (fence == (CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE))) + return true; + else + return false; +} + +bool helperFunction(float *floatp, float val) { + if (!isFenceValid(get_fence(floatp))) + return false; + + if (*floatp != val) + return false; + + return true; +} + +__kernel void testKernel(__global uint *results) { + uint tid = get_global_id(0); + + __private float val; + val = 0.1f; + float *volatile ptr = &val; + + results[tid] = helperFunction(ptr, val); +} diff --git a/test/AddressSpaceCast/incompatible_cast.ll b/test/AddressSpaceCast/incompatible_cast.ll new file mode 100644 index 0000000000..a815003316 --- /dev/null +++ b/test/AddressSpaceCast/incompatible_cast.ll @@ -0,0 +1,72 @@ +; RUN: clspv-opt --passes=lower-addrspacecast %s -o %t.ll +; RUN: FileCheck %s < %t.ll + +; CHECK-NOT: addrspacecast +; CHECK: getelementptr inbounds float, ptr addrspace(3) null +; CHECK-NOT: addrspacecast + +target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir-unknown-unknown" + +%0 = type { <3 x i32> } + +@__push_constants = addrspace(9) global %0 zeroinitializer, !push_constants !0 +@__spirv_GlobalInvocationId = addrspace(5) global <3 x i32> zeroinitializer +@__spirv_WorkgroupSize = addrspace(8) global <3 x i32> zeroinitializer + +; Function Attrs: convergent mustprogress norecurse nounwind +define dso_local spir_kernel void @k(ptr addrspace(1) align 4 %in, ptr addrspace(1) align 4 %out) #0 !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 !clspv.pod_args_impl !11 { +entry: + %0 = load i32, ptr addrspace(5) @__spirv_GlobalInvocationId, align 16 + %1 = load i32, ptr addrspace(9) @__push_constants, align 16 + %2 = call i32 @clspv.wrap_constant_load.0(i32 %1) #2 + %3 = add i32 %0, %2 + %4 = addrspacecast ptr addrspace(1) %in to ptr addrspace(4) + br label %for.cond.i + +for.cond.i: ; preds = %for.body.i, %entry + %j.0.i = phi i32 [ 0, %entry ], [ %inc.i, %for.body.i ] + %res.0.i = phi float [ 0.000000e+00, %entry ], [ %add.i, %for.body.i ] + %cmp.i = icmp ult i32 %j.0.i, %3 + br i1 %cmp.i, label %for.body.i, label %_Z4loopPU3AS4Kfj.exit + +for.body.i: ; preds = %for.cond.i + %5 = addrspacecast ptr addrspace(4) %4 to ptr addrspace(3) + %arrayidx.i = getelementptr inbounds float, ptr addrspace(3) %5, i32 %j.0.i + %6 = load float, ptr addrspace(3) %arrayidx.i, align 4 + %add.i = fadd float %res.0.i, %6 + %inc.i = add i32 %j.0.i, 1 + br label %for.cond.i, !llvm.loop !12 + +_Z4loopPU3AS4Kfj.exit: ; preds = %for.cond.i + %arrayidx = getelementptr inbounds float, ptr addrspace(1) %out, i32 %3 + store float %res.0.i, ptr addrspace(1) %arrayidx, align 4 + ret void +} + +; Function Attrs: memory(read) +declare i32 @clspv.wrap_constant_load.0(i32) #1 + +attributes #0 = { convergent mustprogress norecurse nounwind "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="0" "stackrealign" "uniform-work-group-size"="true" } +attributes #1 = { memory(read) } +attributes #2 = { nounwind } + +!llvm.module.flags = !{!1, !2} +!opencl.ocl.version = !{!3} +!opencl.spir.version = !{!3, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4} +!llvm.ident = !{!5, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6} + +!0 = !{i32 4} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{i32 7, !"frame-pointer", i32 2} +!3 = !{i32 2, i32 0} +!4 = !{i32 1, i32 2} +!5 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project be3764fecc263f7180bfada7ac61c5f8d799610e)"} +!6 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project 22b564c64b736f5a422b3967720c871c8f9eee9b)"} +!7 = !{i32 1, i32 1} +!8 = !{!"none", !"none"} +!9 = !{!"float*", !"float*"} +!10 = !{!"", !""} +!11 = !{i32 3} +!12 = distinct !{!12, !13} +!13 = !{!"llvm.loop.mustprogress"} diff --git a/test/AddressSpaceCast/issue-1077.ll b/test/AddressSpaceCast/issue-1077.ll new file mode 100644 index 0000000000..d9e2ac002a --- /dev/null +++ b/test/AddressSpaceCast/issue-1077.ll @@ -0,0 +1,79 @@ +; RUN: clspv-opt --passes=lower-addrspacecast %s -o %t.ll +; RUN: FileCheck %s < %t.ll + +; CHECK-NOT: addrspacecast +; CHECK: icmp eq ptr addrspace(1) %in, null +; CHECK: [[gep:%[^ ]+]] = getelementptr inbounds float, ptr addrspace(1) %in +; CHECK: load float, ptr addrspace(1) [[gep]] + +target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir-unknown-unknown" + +%0 = type { <3 x i32> } + +@__push_constants = addrspace(9) global %0 zeroinitializer, !push_constants !0 +@__spirv_GlobalInvocationId = addrspace(5) global <3 x i32> zeroinitializer +@__spirv_WorkgroupSize = addrspace(8) global <3 x i32> zeroinitializer + +; Function Attrs: convergent mustprogress norecurse nounwind +define dso_local spir_kernel void @k(ptr addrspace(1) align 4 %in, ptr addrspace(1) align 4 %out) #0 !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 !clspv.pod_args_impl !11 { +entry: + %0 = load i32, ptr addrspace(5) @__spirv_GlobalInvocationId, align 16 + %1 = load i32, ptr addrspace(9) @__push_constants, align 16 + %2 = call i32 @clspv.wrap_constant_load.0(i32 %1) #2 + %3 = add i32 %0, %2 + %4 = addrspacecast ptr addrspace(1) %in to ptr addrspace(4) + br label %for.cond.i + +for.cond.i: ; preds = %if.end.i, %entry + %j.0.i = phi i32 [ 0, %entry ], [ %inc.i, %if.end.i ] + %res.0.i = phi float [ 0.000000e+00, %entry ], [ %add.i, %if.end.i ] + %cmp.i = icmp ult i32 %j.0.i, %3 + br i1 %cmp.i, label %for.body.i, label %_Z4loopPU3AS4Kfj.exit + +for.body.i: ; preds = %for.cond.i + %tobool.not.i = icmp eq ptr addrspace(4) %4, null + br i1 %tobool.not.i, label %if.then.i, label %if.end.i + +if.then.i: ; preds = %for.body.i + br label %_Z4loopPU3AS4Kfj.exit + +if.end.i: ; preds = %for.body.i + %arrayidx.i = getelementptr inbounds float, ptr addrspace(4) %4, i32 %j.0.i + %5 = load float, ptr addrspace(4) %arrayidx.i, align 4 + %add.i = fadd float %res.0.i, %5 + %inc.i = add i32 %j.0.i, 1 + br label %for.cond.i, !llvm.loop !12 + +_Z4loopPU3AS4Kfj.exit: ; preds = %for.cond.i, %if.then.i + %arrayidx = getelementptr inbounds float, ptr addrspace(1) %out, i32 %3 + store float %res.0.i, ptr addrspace(1) %arrayidx, align 4 + ret void +} + +; Function Attrs: memory(read) +declare i32 @clspv.wrap_constant_load.0(i32) #1 + +attributes #0 = { convergent mustprogress norecurse nounwind "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="0" "stackrealign" "uniform-work-group-size"="true" } +attributes #1 = { memory(read) } +attributes #2 = { nounwind } + +!llvm.module.flags = !{!1, !2} +!opencl.ocl.version = !{!3} +!opencl.spir.version = !{!3, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4} +!llvm.ident = !{!5, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6} + +!0 = !{i32 4} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{i32 7, !"frame-pointer", i32 2} +!3 = !{i32 2, i32 0} +!4 = !{i32 1, i32 2} +!5 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project be3764fecc263f7180bfada7ac61c5f8d799610e)"} +!6 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project 22b564c64b736f5a422b3967720c871c8f9eee9b)"} +!7 = !{i32 1, i32 1} +!8 = !{!"none", !"none"} +!9 = !{!"float*", !"float*"} +!10 = !{!"", !""} +!11 = !{i32 3} +!12 = distinct !{!12, !13} +!13 = !{!"llvm.loop.mustprogress"} diff --git a/test/AddressSpaceCast/to_global.cl b/test/AddressSpaceCast/to_global.cl new file mode 100644 index 0000000000..92c3ef5552 --- /dev/null +++ b/test/AddressSpaceCast/to_global.cl @@ -0,0 +1,18 @@ +// RUN: clspv --cl-std=CLC++ --inline-entry-points %s -o %t.spv --show-producer-ir &> %t.ll +// RUN: spirv-val --target-env spv1.0 %t.spv +// RUN: FileCheck %s < %t.ll + +// CHECK-NOT: addrspacecast + +float loop(const float *data, unsigned num) { + float res = 0; + for (unsigned j = 0; j < num; ++j) { + res += to_local(data)[j]; + } + return res; +} + +kernel void k(global float* in, global float* out) { + unsigned index = get_global_id(0); + out[index] = loop(in, index); +} diff --git a/test/AddressSpaceCast/to_global.ll b/test/AddressSpaceCast/to_global.ll new file mode 100644 index 0000000000..12bf8b7e88 --- /dev/null +++ b/test/AddressSpaceCast/to_global.ll @@ -0,0 +1,80 @@ +; RUN: clspv-opt --passes=replace-opencl-builtin %s -o %t.ll +; RUN: FileCheck %s < %t.ll + +; CHECK-NOT: call spirv_func ptr addrspace(1) @__to_global +; CHECK: addrspacecast ptr addrspace(4) {{.*}} to ptr addrspace(1) +; CHECK-NOT: call spirv_func ptr addrspace(1) @__to_global + +target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir-unknown-unknown" + +%0 = type { <3 x i32> } + +@__push_constants = addrspace(9) global %0 zeroinitializer, !push_constants !0 +@__spirv_GlobalInvocationId = addrspace(5) global <3 x i32> zeroinitializer +@__spirv_WorkgroupSize = addrspace(8) global <3 x i32> zeroinitializer + +; Function Attrs: convergent mustprogress norecurse nounwind +define dso_local spir_func float @_Z4loopPU3AS4Kfj(ptr addrspace(4) %data, i32 %num) #0 { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %j.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %res.0 = phi float [ 0.000000e+00, %entry ], [ %add, %for.inc ] + %cmp = icmp ult i32 %j.0, %num + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %0 = call spir_func ptr addrspace(1) @__to_global(ptr addrspace(4) %data) + %arrayidx = getelementptr inbounds float, ptr addrspace(1) %0, i32 %j.0 + %1 = load float, ptr addrspace(1) %arrayidx, align 4 + %add = fadd float %res.0, %1 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add i32 %j.0, 1 + br label %for.cond, !llvm.loop !7 + +for.end: ; preds = %for.cond + ret float %res.0 +} + +declare spir_func ptr addrspace(1) @__to_global(ptr addrspace(4)) + +; Function Attrs: convergent mustprogress norecurse nounwind +define dso_local spir_kernel void @k(ptr addrspace(1) align 4 %in, ptr addrspace(1) align 4 %out) #1 !kernel_arg_addr_space !9 !kernel_arg_access_qual !10 !kernel_arg_type !11 !kernel_arg_base_type !11 !kernel_arg_type_qual !12 !clspv.pod_args_impl !13 { +entry: + %0 = load i32, ptr addrspace(5) @__spirv_GlobalInvocationId, align 4 + %1 = load i32, ptr addrspace(9) @__push_constants, align 4 + %2 = add i32 %0, %1 + %3 = addrspacecast ptr addrspace(1) %in to ptr addrspace(4) + %call1 = call spir_func float @_Z4loopPU3AS4Kfj(ptr addrspace(4) %3, i32 %2) #2 + %arrayidx = getelementptr inbounds float, ptr addrspace(1) %out, i32 %2 + store float %call1, ptr addrspace(1) %arrayidx, align 4 + ret void +} + +attributes #0 = { convergent mustprogress norecurse nounwind "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="0" "stackrealign" } +attributes #1 = { convergent mustprogress norecurse nounwind "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="0" "stackrealign" "uniform-work-group-size"="true" } +attributes #2 = { convergent nobuiltin nounwind "no-builtins" } + +!llvm.module.flags = !{!1, !2} +!opencl.ocl.version = !{!3} +!opencl.spir.version = !{!3, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4} +!llvm.ident = !{!5, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6} + +!0 = !{i32 4} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{i32 7, !"frame-pointer", i32 2} +!3 = !{i32 2, i32 0} +!4 = !{i32 1, i32 2} +!5 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project be3764fecc263f7180bfada7ac61c5f8d799610e)"} +!6 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project 22b564c64b736f5a422b3967720c871c8f9eee9b)"} +!7 = distinct !{!7, !8} +!8 = !{!"llvm.loop.mustprogress"} +!9 = !{i32 1, i32 1} +!10 = !{!"none", !"none"} +!11 = !{!"float*", !"float*"} +!12 = !{!"", !""} +!13 = !{i32 3} diff --git a/test/AddressSpaceCast/to_local.cl b/test/AddressSpaceCast/to_local.cl new file mode 100644 index 0000000000..8bc3008bad --- /dev/null +++ b/test/AddressSpaceCast/to_local.cl @@ -0,0 +1,18 @@ +// RUN: clspv --cl-std=CLC++ --inline-entry-points %s -o %t.spv --show-producer-ir &> %t.ll +// RUN: spirv-val --target-env spv1.0 %t.spv +// RUN: FileCheck %s < %t.ll + +// CHECK-NOT: addrspacecast + +float loop(const float *data, unsigned num) { + float res = 0; + for (unsigned j = 0; j < num; ++j) { + res += to_local(data)[j]; + } + return res; +} + +kernel void k(local float* in, global float* out) { + unsigned index = get_global_id(0); + out[index] = loop(in, index); +} diff --git a/test/AddressSpaceCast/to_local.ll b/test/AddressSpaceCast/to_local.ll new file mode 100644 index 0000000000..e7fa831d15 --- /dev/null +++ b/test/AddressSpaceCast/to_local.ll @@ -0,0 +1,80 @@ +; RUN: clspv-opt --passes=replace-opencl-builtin %s -o %t.ll +; RUN: FileCheck %s < %t.ll + +; CHECK-NOT: call spirv_func ptr addrspace(3) @__to_local +; CHECK: addrspacecast ptr addrspace(4) {{.*}} to ptr addrspace(3) +; CHECK-NOT: call spirv_func ptr addrspace(3) @__to_local + +target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir-unknown-unknown" + +%0 = type { <3 x i32> } + +@__push_constants = addrspace(9) global %0 zeroinitializer, !push_constants !0 +@__spirv_GlobalInvocationId = addrspace(5) global <3 x i32> zeroinitializer +@__spirv_WorkgroupSize = addrspace(8) global <3 x i32> zeroinitializer + +; Function Attrs: convergent mustprogress norecurse nounwind +define dso_local spir_func float @_Z4loopPU3AS4Kfj(ptr addrspace(4) %data, i32 %num) #0 { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %j.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %res.0 = phi float [ 0.000000e+00, %entry ], [ %add, %for.inc ] + %cmp = icmp ult i32 %j.0, %num + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %0 = call spir_func ptr addrspace(3) @__to_local(ptr addrspace(4) %data) + %arrayidx = getelementptr inbounds float, ptr addrspace(3) %0, i32 %j.0 + %1 = load float, ptr addrspace(3) %arrayidx, align 4 + %add = fadd float %res.0, %1 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add i32 %j.0, 1 + br label %for.cond, !llvm.loop !7 + +for.end: ; preds = %for.cond + ret float %res.0 +} + +declare spir_func ptr addrspace(3) @__to_local(ptr addrspace(4)) + +; Function Attrs: convergent mustprogress norecurse nounwind +define dso_local spir_kernel void @k(ptr addrspace(3) align 4 %in, ptr addrspace(1) align 4 %out) #1 !kernel_arg_addr_space !9 !kernel_arg_access_qual !10 !kernel_arg_type !11 !kernel_arg_base_type !11 !kernel_arg_type_qual !12 !clspv.pod_args_impl !13 { +entry: + %0 = load i32, ptr addrspace(5) @__spirv_GlobalInvocationId, align 4 + %1 = load i32, ptr addrspace(9) @__push_constants, align 4 + %2 = add i32 %0, %1 + %3 = addrspacecast ptr addrspace(3) %in to ptr addrspace(4) + %call1 = call spir_func float @_Z4loopPU3AS4Kfj(ptr addrspace(4) %3, i32 %2) #2 + %arrayidx = getelementptr inbounds float, ptr addrspace(1) %out, i32 %2 + store float %call1, ptr addrspace(1) %arrayidx, align 4 + ret void +} + +attributes #0 = { convergent mustprogress norecurse nounwind "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="0" "stackrealign" } +attributes #1 = { convergent mustprogress norecurse nounwind "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="0" "stackrealign" "uniform-work-group-size"="true" } +attributes #2 = { convergent nobuiltin nounwind "no-builtins" } + +!llvm.module.flags = !{!1, !2} +!opencl.ocl.version = !{!3} +!opencl.spir.version = !{!3, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4} +!llvm.ident = !{!5, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6} + +!0 = !{i32 4} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{i32 7, !"frame-pointer", i32 2} +!3 = !{i32 2, i32 0} +!4 = !{i32 1, i32 2} +!5 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project be3764fecc263f7180bfada7ac61c5f8d799610e)"} +!6 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project 22b564c64b736f5a422b3967720c871c8f9eee9b)"} +!7 = distinct !{!7, !8} +!8 = !{!"llvm.loop.mustprogress"} +!9 = !{i32 3, i32 1} +!10 = !{!"none", !"none"} +!11 = !{!"float*", !"float*"} +!12 = !{!"", !""} +!13 = !{i32 3} diff --git a/test/AddressSpaceCast/to_private.cl b/test/AddressSpaceCast/to_private.cl new file mode 100644 index 0000000000..a6d5b18dbc --- /dev/null +++ b/test/AddressSpaceCast/to_private.cl @@ -0,0 +1,19 @@ +// RUN: clspv --cl-std=CLC++ --inline-entry-points %s -o %t.spv --show-producer-ir &> %t.ll +// RUN: spirv-val --target-env spv1.0 %t.spv +// RUN: FileCheck %s < %t.ll + +// CHECK-NOT: addrspacecast + +float loop(const float *data, unsigned num) { + float res = 0; + for (unsigned j = 0; j < num; ++j) { + res += to_private(data)[j]; + } + return res; +} + +kernel void k(global float* out) { + float in[128]; + unsigned index = get_global_id(0); + out[index] = loop(in, index); +} diff --git a/test/AddressSpaceCast/to_private.ll b/test/AddressSpaceCast/to_private.ll new file mode 100644 index 0000000000..34bdecad9a --- /dev/null +++ b/test/AddressSpaceCast/to_private.ll @@ -0,0 +1,83 @@ +; RUN: clspv-opt --passes=replace-opencl-builtin %s -o %t.ll +; RUN: FileCheck %s < %t.ll + +; CHECK-NOT: call spirv_func ptr @__to_private +; CHECK: addrspacecast ptr addrspace(4) {{.*}} to ptr +; CHECK-NOT: call spirv_func ptr @__to_private + +target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir-unknown-unknown" + +%0 = type { <3 x i32> } + +@__push_constants = addrspace(9) global %0 zeroinitializer, !push_constants !0 +@__spirv_GlobalInvocationId = addrspace(5) global <3 x i32> zeroinitializer +@__spirv_WorkgroupSize = addrspace(8) global <3 x i32> zeroinitializer + +; Function Attrs: convergent mustprogress norecurse nounwind +define dso_local spir_func float @_Z4loopPU3AS4Kfj(ptr addrspace(4) %data, i32 %num) #0 { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %j.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %res.0 = phi float [ 0.000000e+00, %entry ], [ %add, %for.inc ] + %cmp = icmp ult i32 %j.0, %num + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %0 = call spir_func ptr @__to_private(ptr addrspace(4) %data) + %arrayidx = getelementptr inbounds float, ptr %0, i32 %j.0 + %1 = load float, ptr %arrayidx, align 4 + %add = fadd float %res.0, %1 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add i32 %j.0, 1 + br label %for.cond, !llvm.loop !7 + +for.end: ; preds = %for.cond + ret float %res.0 +} + +declare spir_func ptr @__to_private(ptr addrspace(4)) + +; Function Attrs: convergent mustprogress norecurse nounwind +define dso_local spir_kernel void @k(ptr addrspace(1) align 4 %out) #1 !kernel_arg_addr_space !9 !kernel_arg_access_qual !10 !kernel_arg_type !11 !kernel_arg_base_type !11 !kernel_arg_type_qual !12 !clspv.pod_args_impl !13 { +entry: + %in = alloca [128 x float], align 4 + store [128 x float] zeroinitializer, ptr %in, align 4 + %0 = load i32, ptr addrspace(5) @__spirv_GlobalInvocationId, align 4 + %1 = load i32, ptr addrspace(9) @__push_constants, align 4 + %2 = add i32 %0, %1 + %arraydecay = getelementptr inbounds [128 x float], ptr %in, i32 0, i32 0 + %arraydecay.ascast = addrspacecast ptr %arraydecay to ptr addrspace(4) + %call1 = call spir_func float @_Z4loopPU3AS4Kfj(ptr addrspace(4) %arraydecay.ascast, i32 %2) #2 + %arrayidx = getelementptr inbounds float, ptr addrspace(1) %out, i32 %2 + store float %call1, ptr addrspace(1) %arrayidx, align 4 + ret void +} + +attributes #0 = { convergent mustprogress norecurse nounwind "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="0" "stackrealign" } +attributes #1 = { convergent mustprogress norecurse nounwind "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="0" "stackrealign" "uniform-work-group-size"="true" } +attributes #2 = { convergent nobuiltin nounwind "no-builtins" } + +!llvm.module.flags = !{!1, !2} +!opencl.ocl.version = !{!3} +!opencl.spir.version = !{!3, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4} +!llvm.ident = !{!5, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6} + +!0 = !{i32 4} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{i32 7, !"frame-pointer", i32 2} +!3 = !{i32 2, i32 0} +!4 = !{i32 1, i32 2} +!5 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project be3764fecc263f7180bfada7ac61c5f8d799610e)"} +!6 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project 22b564c64b736f5a422b3967720c871c8f9eee9b)"} +!7 = distinct !{!7, !8} +!8 = !{!"llvm.loop.mustprogress"} +!9 = !{i32 1} +!10 = !{!"none"} +!11 = !{!"float*"} +!12 = !{!""} +!13 = !{i32 3} diff --git a/test/AtomicBuiltins/atomic_flag.cl b/test/AtomicBuiltins/atomic_flag.cl index 868d9a4da5..76e9b7d202 100644 --- a/test/AtomicBuiltins/atomic_flag.cl +++ b/test/AtomicBuiltins/atomic_flag.cl @@ -1,4 +1,4 @@ -// RUN: clspv %s --cl-std=CL3.0 --enable-feature-macros=__opencl_c_atomic_order_seq_cst,__opencl_c_atomic_scope_device -o %t.spv +// RUN: clspv %s --cl-std=CL3.0 --inline-entry-points --enable-feature-macros=__opencl_c_atomic_order_seq_cst,__opencl_c_atomic_scope_device -o %t.spv // RUN: spirv-val --target-env vulkan1.1 %t.spv // RUN: spirv-dis %t.spv | FileCheck %s diff --git a/test/AtomicBuiltins/atomic_flag_errors.cl b/test/AtomicBuiltins/atomic_flag_errors.cl index 08af940245..6c34bd0280 100644 --- a/test/AtomicBuiltins/atomic_flag_errors.cl +++ b/test/AtomicBuiltins/atomic_flag_errors.cl @@ -1,4 +1,4 @@ -// RUN: clspv %s --cl-std=CL3.0 --enable-feature-macros=__opencl_c_atomic_scope_all_devices -o %t.spv -verify +// RUN: clspv %s --cl-std=CL3.0 --inline-entry-points --enable-feature-macros=__opencl_c_atomic_scope_all_devices -o %t.spv -verify kernel void not_constant(global int* out, global atomic_flag *flag, global int *test) { // "All used for Scope and Memory Semantics must be of an OpConstant." diff --git a/test/AtomicBuiltins/atomic_flag_warnings.cl b/test/AtomicBuiltins/atomic_flag_warnings.cl index cc85cbc3f4..e453ccb339 100644 --- a/test/AtomicBuiltins/atomic_flag_warnings.cl +++ b/test/AtomicBuiltins/atomic_flag_warnings.cl @@ -1,4 +1,4 @@ -// RUN: clspv %s --cl-std=CL3.0 --enable-feature-macros=__opencl_c_atomic_order_seq_cst,__opencl_c_atomic_scope_device -o %t.spv -verify +// RUN: clspv %s --cl-std=CL3.0 --inline-entry-points --enable-feature-macros=__opencl_c_atomic_order_seq_cst,__opencl_c_atomic_scope_device -o %t.spv -verify kernel void flag_global(global int *out, global atomic_flag *flag) { //expected-warning@+1{{memory_order_seq_cst is treated as memory_order_acq_rel}} diff --git a/test/AtomicBuiltins/atomic_init.cl b/test/AtomicBuiltins/atomic_init.cl index 066bd05efa..5245bce6fa 100644 --- a/test/AtomicBuiltins/atomic_init.cl +++ b/test/AtomicBuiltins/atomic_init.cl @@ -1,4 +1,4 @@ -// RUN: clspv --cl-std=CL3.0 %s -o %t.spv +// RUN: clspv --cl-std=CL3.0 --inline-entry-points %s -o %t.spv // RUN: spirv-dis -o %t2.spvasm %t.spv // RUN: FileCheck %s < %t2.spvasm // RUN: spirv-val --target-env vulkan1.0 %t.spv diff --git a/test/AtomicBuiltins/enum_values.cl b/test/AtomicBuiltins/enum_values.cl index 34ee55624e..97e4af4536 100644 --- a/test/AtomicBuiltins/enum_values.cl +++ b/test/AtomicBuiltins/enum_values.cl @@ -1,4 +1,4 @@ -// RUN: clspv %target --enable-feature-macros=__opencl_c_atomic_scope_all_devices,__opencl_c_atomic_order_seq_cst --cl-std=CL3.0 %s -o %t.spv +// RUN: clspv %target --enable-feature-macros=__opencl_c_atomic_scope_all_devices,__opencl_c_atomic_order_seq_cst --cl-std=CL3.0 --inline-entry-points %s -o %t.spv // RUN: spirv-dis %t.spv -o %t.spvasm // RUN: FileCheck %s < %t.spvasm // diff --git a/test/Diagnostics/dse-unsupported-cl3.cl b/test/Diagnostics/dse-unsupported-cl3.cl index c3d1b34729..a760cf86f4 100644 --- a/test/Diagnostics/dse-unsupported-cl3.cl +++ b/test/Diagnostics/dse-unsupported-cl3.cl @@ -1,4 +1,4 @@ -// RUN: clspv %target -cl-std=CL3.0 -verify %s +// RUN: clspv %target -cl-std=CL3.0 --inline-entry-points -verify %s kernel void dse(queue_t queue) { //expected-error{{unknown type name 'queue_t'}} ndrange_t ndrange = ndrange_1D(1); //expected-error{{use of undeclared identifier 'ndrange_t'}} diff --git a/test/Diagnostics/pipes-unsupported-cl3.cl b/test/Diagnostics/pipes-unsupported-cl3.cl index 2aec2593f3..a25e4b7e4d 100644 --- a/test/Diagnostics/pipes-unsupported-cl3.cl +++ b/test/Diagnostics/pipes-unsupported-cl3.cl @@ -1,4 +1,4 @@ -// RUN: clspv %target -cl-std=CL3.0 -verify %s +// RUN: clspv %target -cl-std=CL3.0 --inline-entry-points -verify %s kernel void pipes(read_only pipe int in, write_only pipe int out) { //expected-error@3{{OpenCL C version 3.0 does not support the 'pipe' type qualifier}} diff --git a/test/Features/cl3-all-features.cl b/test/Features/cl3-all-features.cl index 754356ce48..a12d6802f9 100644 --- a/test/Features/cl3-all-features.cl +++ b/test/Features/cl3-all-features.cl @@ -1,4 +1,4 @@ -// RUN: clspv -cl-std=CL3.0 --enable-feature-macros=__opencl_c_3d_image_writes,__opencl_c_atomic_order_acq_rel,__opencl_c_fp64,__opencl_c_images,__opencl_c_subgroups,__opencl_c_int64,__opencl_c_atomic_order_seq_cst,__opencl_c_read_write_images,__opencl_c_atomic_scope_device,__opencl_c_atomic_scope_all_devices,__opencl_c_work_group_collective_functions %s -verify +// RUN: clspv -cl-std=CL3.0 --inline-entry-points --enable-feature-macros=__opencl_c_3d_image_writes,__opencl_c_atomic_order_acq_rel,__opencl_c_fp64,__opencl_c_images,__opencl_c_subgroups,__opencl_c_int64,__opencl_c_atomic_order_seq_cst,__opencl_c_read_write_images,__opencl_c_atomic_scope_device,__opencl_c_atomic_scope_all_devices,__opencl_c_work_group_collective_functions %s -verify #ifndef __opencl_c_3d_image_writes #error __opencl_c_3d_image_writes should be defined diff --git a/test/Features/cl3-disabled-features.cl b/test/Features/cl3-disabled-features.cl index 78e4a38fff..8eff61818c 100644 --- a/test/Features/cl3-disabled-features.cl +++ b/test/Features/cl3-disabled-features.cl @@ -1,4 +1,4 @@ -// RUN: clspv %target -cl-std=CL3.0 %s -verify +// RUN: clspv %target -cl-std=CL3.0 --inline-entry-points %s -verify #ifdef __opencl_c_pipes #error FAIL diff --git a/test/Features/cl3-no-features.cl b/test/Features/cl3-no-features.cl index 74e036249f..5387e5e615 100644 --- a/test/Features/cl3-no-features.cl +++ b/test/Features/cl3-no-features.cl @@ -1,4 +1,4 @@ -// RUN: clspv -fp64=0 -images=0 -cl-std=CL3.0 %s -verify +// RUN: clspv -fp64=0 -images=0 -cl-std=CL3.0 --inline-entry-points %s -verify #ifdef __opencl_c_3d_image_writes #error __opencl_c_3d_image_writes should not be defined diff --git a/test/Features/cl3-some-features.cl b/test/Features/cl3-some-features.cl index 8dce8d92d4..95c9f2226d 100644 --- a/test/Features/cl3-some-features.cl +++ b/test/Features/cl3-some-features.cl @@ -1,4 +1,4 @@ -// RUN: clspv -cl-std=CL3.0 -fp64=0 --enable-feature-macros=__opencl_c_atomic_order_acq_rel,__opencl_c_images,__opencl_c_atomic_order_seq_cst,__opencl_c_int64 %s -verify +// RUN: clspv -cl-std=CL3.0 --inline-entry-points -fp64=0 --enable-feature-macros=__opencl_c_atomic_order_acq_rel,__opencl_c_images,__opencl_c_atomic_order_seq_cst,__opencl_c_int64 %s -verify #ifdef __opencl_c_3d_image_writes #error __opencl_c_3d_image_writes should not be defined diff --git a/test/Features/fp64-default-cl3.cl b/test/Features/fp64-default-cl3.cl index 11812990e0..31060f94b7 100644 --- a/test/Features/fp64-default-cl3.cl +++ b/test/Features/fp64-default-cl3.cl @@ -1,4 +1,4 @@ -// RUN: clspv %target -cl-std=CL3.0 %s -verify +// RUN: clspv %target -cl-std=CL3.0 --inline-entry-points %s -verify #ifndef cl_khr_fp64 #error FAIL diff --git a/test/Features/fp64-disabled-cl3.cl b/test/Features/fp64-disabled-cl3.cl index 9b4a00e560..4968b18df3 100644 --- a/test/Features/fp64-disabled-cl3.cl +++ b/test/Features/fp64-disabled-cl3.cl @@ -1,4 +1,4 @@ -// RUN: clspv %target -cl-std=CL3.0 -fp64=0 %s -verify +// RUN: clspv %target -cl-std=CL3.0 --inline-entry-points -fp64=0 %s -verify #ifdef cl_khr_fp64 #error FAIL diff --git a/test/SubGroup/get_sub_group_max_size.cl b/test/SubGroup/get_sub_group_max_size.cl index 7c8726a38a..6fdc7c8b0f 100644 --- a/test/SubGroup/get_sub_group_max_size.cl +++ b/test/SubGroup/get_sub_group_max_size.cl @@ -1,4 +1,4 @@ -// RUN: clspv %target %s -cl-std=CL3.0 -spv-version=1.3 -o %t.spv +// RUN: clspv %target %s -cl-std=CL3.0 --inline-entry-points -spv-version=1.3 -o %t.spv // RUN: spirv-dis -o %t2.spvasm %t.spv // RUN: FileCheck %s < %t2.spvasm // RUN: spirv-val --target-env vulkan1.2 %t.spv diff --git a/test/SynchronizationBuiltins/atomic_work_item_fence.cl b/test/SynchronizationBuiltins/atomic_work_item_fence.cl index b3f7ef95f8..e72a8789d2 100644 --- a/test/SynchronizationBuiltins/atomic_work_item_fence.cl +++ b/test/SynchronizationBuiltins/atomic_work_item_fence.cl @@ -1,4 +1,4 @@ -// RUN: clspv %target %s -o %t.spv --cl-std=CL3.0 +// RUN: clspv %target %s -o %t.spv --cl-std=CL3.0 --inline-entry-points // RUN: spirv-dis %t.spv -o %t.spvasm // RUN: FileCheck %s < %t.spvasm // RUN: spirv-val --target-env vulkan1.0 %t.spv diff --git a/test/opsource.cl b/test/opsource.cl index bab5aab8b5..b934581451 100644 --- a/test/opsource.cl +++ b/test/opsource.cl @@ -23,7 +23,7 @@ // RUN: FileCheck --check-prefix=CHECK20 %s < %t.spvasm // RUN: spirv-val --target-env vulkan1.0 %t.spv -// RUN: clspv %target -cl-std=CL3.0 %s -o %t.spv +// RUN: clspv %target -cl-std=CL3.0 --inline-entry-points %s -o %t.spv // RUN: spirv-dis -o %t.spvasm %t.spv // RUN: FileCheck --check-prefix=CHECK30 %s < %t.spvasm // RUN: spirv-val --target-env vulkan1.0 %t.spv