From 50a1ab12abbe948e6d3f8418f11bfa1951c8d19e Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Mon, 23 Sep 2024 16:05:55 +0100 Subject: [PATCH] [LAA] Don't assume libcalls with output/input pointers can be vectorized (#108980) LoopAccessAnalysis currently does not check/track aliasing from the output pointers, but assumes vectorizing library calls with a mapping is safe. This can result in incorrect codegen if something like the following is vectorized: ``` for(int i=0; i= 2 `cos_out[1]` to `cos_out[VF-1]` is the cosine of the original value of `cos_out[0]` not the updated value. --- llvm/lib/Analysis/LoopAccessAnalysis.cpp | 17 +- .../LoopVectorize/AArch64/scalable-call.ll | 6 +- .../AArch64/veclib-function-calls.ll | 93 ++++++----- .../AArch64/vector-call-linear-args.ll | 151 ++++-------------- 4 files changed, 92 insertions(+), 175 deletions(-) diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 980f142f113265..3f189724763d47 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -2449,13 +2449,20 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI, continue; // If this is a load, save it. If this instruction can read from memory - // but is not a load, then we quit. Notice that we don't handle function - // calls that read or write. + // but is not a load, we only allow it if it's a call to a function with a + // vector mapping and no pointer arguments. if (I.mayReadFromMemory()) { - // If the function has an explicit vectorized counterpart, we can safely - // assume that it can be vectorized. + auto hasPointerArgs = [](CallBase *CB) { + return any_of(CB->args(), [](Value const *Arg) { + return Arg->getType()->isPointerTy(); + }); + }; + + // If the function has an explicit vectorized counterpart, and does not + // take output/input pointers, we can safely assume that it can be + // vectorized. if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() && - !VFDatabase::getMappings(*Call).empty()) + !hasPointerArgs(Call) && !VFDatabase::getMappings(*Call).empty()) continue; auto *Ld = dyn_cast(&I); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll index 498f2059ffb0c4..7797c0bce0baa4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll @@ -54,9 +54,9 @@ for.end: ; preds = %for.body, %entry define void @vec_ptr(i64 %N, ptr noalias %a, ptr readnone %b) { ; CHECK-LABEL: @vec_ptr -; CHECK: vector.body: -; CHECK: %[[LOAD:.*]] = load , ptr -; CHECK: call @bar_vec( %[[LOAD]]) +; CHECK: for.body: +; CHECK: %[[LOAD:.*]] = load ptr, ptr +; CHECK: call i64 @bar(ptr %[[LOAD]]) entry: %cmp7 = icmp sgt i64 %N, 0 br i1 %cmp7, label %for.body, label %for.end diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll index d9cc630482fc80..41ccb3c404dd76 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll @@ -2902,35 +2902,36 @@ define void @log2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } +; FIXME: Re-enable modf[f] vectorization once aliasing issues due to output +; pointers have been resolved. + declare double @modf(double, ptr) declare float @modff(float, ptr) define void @modf_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; SLEEF-NEON-LABEL: define void @modf_f64 ; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2vl8_modf(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]]) +; SLEEF-NEON: [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) ; ; SLEEF-SVE-LABEL: define void @modf_f64 ; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]] +; SLEEF-SVE: [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) ; ; SLEEF-SVE-NOPRED-LABEL: define void @modf_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP17:%.*]] = call @_ZGVsNxvl8_modf( [[WIDE_LOAD:%.*]], ptr [[TMP16:%.*]]) -; SLEEF-SVE-NOPRED: [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR64:[0-9]+]] +; SLEEF-SVE-NOPRED: [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) ; ; ARMPL-NEON-LABEL: define void @modf_f64 ; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP5:%.*]] = call <2 x double> @armpl_vmodfq_f64(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]]) +; ARMPL-NEON: [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) ; ; ARMPL-SVE-LABEL: define void @modf_f64 ; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP23:%.*]] = call @armpl_svmodf_f64_x( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) ; ; ARMPL-SVE-NOPRED-LABEL: define void @modf_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP17:%.*]] = call @armpl_svmodf_f64_x( [[WIDE_LOAD:%.*]], ptr [[TMP16:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; ARMPL-SVE-NOPRED: [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR64:[0-9]+]] +; ARMPL-SVE-NOPRED: [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) ; entry: br label %for.body @@ -2954,29 +2955,27 @@ for.cond.cleanup: define void @modf_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; SLEEF-NEON-LABEL: define void @modf_f32 ; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4vl4_modff(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]]) +; SLEEF-NEON: [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) ; ; SLEEF-SVE-LABEL: define void @modf_f32 ; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR5:[0-9]+]] +; SLEEF-SVE: [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) ; ; SLEEF-SVE-NOPRED-LABEL: define void @modf_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP17:%.*]] = call @_ZGVsNxvl4_modff( [[WIDE_LOAD:%.*]], ptr [[TMP16:%.*]]) -; SLEEF-SVE-NOPRED: [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR65:[0-9]+]] +; SLEEF-SVE-NOPRED: [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) ; ; ARMPL-NEON-LABEL: define void @modf_f32 ; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: [[TMP5:%.*]] = call <4 x float> @armpl_vmodfq_f32(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]]) +; ARMPL-NEON: [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) ; ; ARMPL-SVE-LABEL: define void @modf_f32 ; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[TMP23:%.*]] = call @armpl_svmodf_f32_x( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) ; ; ARMPL-SVE-NOPRED-LABEL: define void @modf_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP17:%.*]] = call @armpl_svmodf_f32_x( [[WIDE_LOAD:%.*]], ptr [[TMP16:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; ARMPL-SVE-NOPRED: [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR65:[0-9]+]] +; ARMPL-SVE-NOPRED: [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) ; entry: br label %for.body @@ -3276,35 +3275,36 @@ define void @sin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ret void } +; FIXME: Re-enable sincos[f] vectorization once aliasing issues with output +; pointers have been resolved. + declare void @sincos(double, ptr, ptr) declare void @sincosf(float, ptr, ptr) define void @sincos_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; SLEEF-NEON-LABEL: define void @sincos_f64 ; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: call void @_ZGVnN2vl8l8_sincos(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) +; SLEEF-NEON: call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) ; ; SLEEF-SVE-LABEL: define void @sincos_f64 ; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR6:[0-9]+]] +; SLEEF-SVE: call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) ; ; SLEEF-SVE-NOPRED-LABEL: define void @sincos_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: call void @_ZGVsNxvl8l8_sincos( [[WIDE_LOAD:%.*]], ptr [[TMP17:%.*]], ptr [[TMP18:%.*]]) -; SLEEF-SVE-NOPRED: call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR72:[0-9]+]] +; SLEEF-SVE-NOPRED: call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) ; ; ARMPL-NEON-LABEL: define void @sincos_f64 ; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: call void @armpl_vsincosq_f64(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) +; ARMPL-NEON: call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) ; ; ARMPL-SVE-LABEL: define void @sincos_f64 ; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: call void @armpl_svsincos_f64_x( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) ; ; ARMPL-SVE-NOPRED-LABEL: define void @sincos_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: call void @armpl_svsincos_f64_x( [[WIDE_LOAD:%.*]], ptr [[TMP17:%.*]], ptr [[TMP18:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; ARMPL-SVE-NOPRED: call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR72:[0-9]+]] +; ARMPL-SVE-NOPRED: call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) ; entry: br label %for.body @@ -3327,29 +3327,27 @@ for.cond.cleanup: define void @sincos_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; SLEEF-NEON-LABEL: define void @sincos_f32 ; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: call void @_ZGVnN4vl4l4_sincosf(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) +; SLEEF-NEON: call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) ; ; SLEEF-SVE-LABEL: define void @sincos_f32 ; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR7:[0-9]+]] +; SLEEF-SVE: call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) ; ; SLEEF-SVE-NOPRED-LABEL: define void @sincos_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: call void @_ZGVsNxvl4l4_sincosf( [[WIDE_LOAD:%.*]], ptr [[TMP17:%.*]], ptr [[TMP18:%.*]]) -; SLEEF-SVE-NOPRED: call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR73:[0-9]+]] +; SLEEF-SVE-NOPRED: call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) ; ; ARMPL-NEON-LABEL: define void @sincos_f32 ; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: call void @armpl_vsincosq_f32(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) +; ARMPL-NEON: call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) ; ; ARMPL-SVE-LABEL: define void @sincos_f32 ; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: call void @armpl_svsincos_f32_x( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) ; ; ARMPL-SVE-NOPRED-LABEL: define void @sincos_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: call void @armpl_svsincos_f32_x( [[WIDE_LOAD:%.*]], ptr [[TMP17:%.*]], ptr [[TMP18:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; ARMPL-SVE-NOPRED: call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR73:[0-9]+]] +; ARMPL-SVE-NOPRED: call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) ; entry: br label %for.body @@ -3369,35 +3367,36 @@ for.cond.cleanup: ret void } +; FIXME: Re-enable sincospi[f] vectorization once aliasing issues with output +; pointers have been resolved. + declare void @sincospi(double, ptr, ptr) declare void @sincospif(float, ptr, ptr) define void @sincospi_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; SLEEF-NEON-LABEL: define void @sincospi_f64 ; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: call void @_ZGVnN2vl8l8_sincospi(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) +; SLEEF-NEON: call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) ; ; SLEEF-SVE-LABEL: define void @sincospi_f64 ; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR8:[0-9]+]] +; SLEEF-SVE: call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) ; ; SLEEF-SVE-NOPRED-LABEL: define void @sincospi_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: call void @_ZGVsNxvl8l8_sincospi( [[WIDE_LOAD:%.*]], ptr [[TMP17:%.*]], ptr [[TMP18:%.*]]) -; SLEEF-SVE-NOPRED: call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR74:[0-9]+]] +; SLEEF-SVE-NOPRED: call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) ; ; ARMPL-NEON-LABEL: define void @sincospi_f64 ; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: call void @armpl_vsincospiq_f64(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) +; ARMPL-NEON: call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) ; ; ARMPL-SVE-LABEL: define void @sincospi_f64 ; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: call void @armpl_svsincospi_f64_x( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) ; ; ARMPL-SVE-NOPRED-LABEL: define void @sincospi_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: call void @armpl_svsincospi_f64_x( [[WIDE_LOAD:%.*]], ptr [[TMP17:%.*]], ptr [[TMP18:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; ARMPL-SVE-NOPRED: call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR74:[0-9]+]] +; ARMPL-SVE-NOPRED: call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) ; entry: br label %for.body @@ -3420,29 +3419,27 @@ for.cond.cleanup: define void @sincospi_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; SLEEF-NEON-LABEL: define void @sincospi_f32 ; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-NEON: call void @_ZGVnN4vl4l4_sincospif(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) +; SLEEF-NEON: call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) ; ; SLEEF-SVE-LABEL: define void @sincospi_f32 ; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-SVE: call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR9:[0-9]+]] +; SLEEF-SVE: call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) ; ; SLEEF-SVE-NOPRED-LABEL: define void @sincospi_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: call void @_ZGVsNxvl4l4_sincospif( [[WIDE_LOAD:%.*]], ptr [[TMP17:%.*]], ptr [[TMP18:%.*]]) -; SLEEF-SVE-NOPRED: call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR75:[0-9]+]] +; SLEEF-SVE-NOPRED: call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) ; ; ARMPL-NEON-LABEL: define void @sincospi_f32 ; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-NEON: call void @armpl_vsincospiq_f32(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) +; ARMPL-NEON: call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) ; ; ARMPL-SVE-LABEL: define void @sincospi_f32 ; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: call void @armpl_svsincospi_f32_x( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ARMPL-SVE: call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) ; ; ARMPL-SVE-NOPRED-LABEL: define void @sincospi_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: call void @armpl_svsincospi_f32_x( [[WIDE_LOAD:%.*]], ptr [[TMP17:%.*]], ptr [[TMP18:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; ARMPL-SVE-NOPRED: call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR75:[0-9]+]] +; ARMPL-SVE-NOPRED: call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll index f60ab5e848dd3a..29904a7822131b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll @@ -13,48 +13,27 @@ target triple = "aarch64-unknown-linux-gnu" define void @test_linear8(ptr noalias %a, ptr readnone %b, i64 %n) { ; NEON-LABEL: define void @test_linear8 ; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { -; NEON: [[TMP2:%.*]] = extractelement <2 x ptr> [[TMP1:%.*]], i32 0 -; NEON: [[TMP3:%.*]] = call <2 x i64> @vec_foo_linear8_nomask_neon(ptr [[TMP2]]) ; NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR0:[0-9]+]] ; ; NEON_INTERLEAVE-LABEL: define void @test_linear8 ; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { -; NEON_INTERLEAVE: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP2:%.*]], i32 0 -; NEON_INTERLEAVE: [[TMP5:%.*]] = call <2 x i64> @vec_foo_linear8_nomask_neon(ptr [[TMP4]]) -; NEON_INTERLEAVE: [[TMP6:%.*]] = extractelement <2 x ptr> [[TMP3:%.*]], i32 0 -; NEON_INTERLEAVE: [[TMP7:%.*]] = call <2 x i64> @vec_foo_linear8_nomask_neon(ptr [[TMP6]]) ; NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR0:[0-9]+]] ; ; SVE_OR_NEON-LABEL: define void @test_linear8 ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { -; SVE_OR_NEON: [[TMP14:%.*]] = extractelement [[TMP13:%.*]], i32 0 -; SVE_OR_NEON: [[TMP15:%.*]] = call @vec_foo_linear8_nomask_sve(ptr [[TMP14]]) ; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]] ; ; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear8 ; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { -; SVE_OR_NEON_INTERLEAVE: [[TMP33:%.*]] = extractelement [[TMP31:%.*]], i32 0 -; SVE_OR_NEON_INTERLEAVE: [[TMP34:%.*]] = call @vec_foo_linear8_mask_sve(ptr [[TMP33]], [[ACTIVE_LANE_MASK:%.*]]) -; SVE_OR_NEON_INTERLEAVE: [[TMP35:%.*]] = extractelement [[TMP32:%.*]], i32 0 -; SVE_OR_NEON_INTERLEAVE: [[TMP36:%.*]] = call @vec_foo_linear8_mask_sve(ptr [[TMP35]], [[ACTIVE_LANE_MASK2:%.*]]) -; SVE_OR_NEON_INTERLEAVE: [[TMP48:%.*]] = extractelement [[TMP46:%.*]], i32 0 -; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]] +; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_linear8 ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { -; SVE_TF: [[TMP20:%.*]] = extractelement [[TMP19:%.*]], i32 0 -; SVE_TF: [[TMP21:%.*]] = call @vec_foo_linear8_mask_sve(ptr [[TMP20]], [[ACTIVE_LANE_MASK:%.*]]) -; SVE_TF: [[TMP25:%.*]] = extractelement [[TMP24:%.*]], i32 0 -; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]] +; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] ; ; SVE_TF_INTERLEAVE-LABEL: define void @test_linear8 ; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { -; SVE_TF_INTERLEAVE: [[TMP33:%.*]] = extractelement [[TMP31:%.*]], i32 0 -; SVE_TF_INTERLEAVE: [[TMP34:%.*]] = call @vec_foo_linear8_mask_sve(ptr [[TMP33]], [[ACTIVE_LANE_MASK:%.*]]) -; SVE_TF_INTERLEAVE: [[TMP35:%.*]] = extractelement [[TMP32:%.*]], i32 0 -; SVE_TF_INTERLEAVE: [[TMP36:%.*]] = call @vec_foo_linear8_mask_sve(ptr [[TMP35]], [[ACTIVE_LANE_MASK2:%.*]]) -; SVE_TF_INTERLEAVE: [[TMP48:%.*]] = extractelement [[TMP46:%.*]], i32 0 -; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]] +; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] ; entry: br label %for.body @@ -76,35 +55,27 @@ for.cond.cleanup: define void @test_vector_linear4(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) { ; NEON-LABEL: define void @test_vector_linear4 ; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) { -; NEON: [[TMP4:%.*]] = extractelement <4 x ptr> [[TMP3:%.*]], i32 0 -; NEON: [[TMP5:%.*]] = call <4 x i32> @vec_baz_vector_linear4_nomask_neon(<4 x i32> [[WIDE_LOAD:%.*]], ptr [[TMP4]]) ; NEON: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] ; ; NEON_INTERLEAVE-LABEL: define void @test_vector_linear4 ; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) { -; NEON_INTERLEAVE: [[TMP8:%.*]] = extractelement <4 x ptr> [[TMP6:%.*]], i32 0 -; NEON_INTERLEAVE: [[TMP9:%.*]] = call <4 x i32> @vec_baz_vector_linear4_nomask_neon(<4 x i32> [[WIDE_LOAD:%.*]], ptr [[TMP8]]) -; NEON_INTERLEAVE: [[TMP10:%.*]] = extractelement <4 x ptr> [[TMP7:%.*]], i32 0 -; NEON_INTERLEAVE: [[TMP11:%.*]] = call <4 x i32> @vec_baz_vector_linear4_nomask_neon(<4 x i32> [[WIDE_LOAD2:%.*]], ptr [[TMP10]]) ; NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] ; ; SVE_OR_NEON-LABEL: define void @test_vector_linear4 ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_OR_NEON: [[TMP16:%.*]] = extractelement [[TMP15:%.*]], i32 0 -; SVE_OR_NEON: [[TMP17:%.*]] = call @vec_baz_vector_linear4_nomask_sve( [[WIDE_LOAD:%.*]], ptr [[TMP16]]) ; SVE_OR_NEON: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] ; ; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_vector_linear4 ; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR5:[0-9]+]] +; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_vector_linear4 ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_TF: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR5:[0-9]+]] +; SVE_TF: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]] ; ; SVE_TF_INTERLEAVE-LABEL: define void @test_vector_linear4 ; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR5:[0-9]+]] +; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]] ; entry: br label %for.body @@ -132,9 +103,7 @@ define void @test_linear8_bad_stride(ptr noalias %a, ptr readnone %b, i64 %n) { ; ; NEON_INTERLEAVE-LABEL: define void @test_linear8_bad_stride ; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { -; NEON_INTERLEAVE: [[TMP4:%.*]] = call i64 @foo(ptr [[TMP2:%.*]]) #[[ATTR2:[0-9]+]] -; NEON_INTERLEAVE: [[TMP5:%.*]] = call i64 @foo(ptr [[TMP3:%.*]]) #[[ATTR2]] -; NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2]] +; NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]] ; ; SVE_OR_NEON-LABEL: define void @test_linear8_bad_stride ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { @@ -142,15 +111,15 @@ define void @test_linear8_bad_stride(ptr noalias %a, ptr readnone %b, i64 %n) { ; ; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear8_bad_stride ; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR6:[0-9]+]] +; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_linear8_bad_stride ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR6:[0-9]+]] +; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] ; ; SVE_TF_INTERLEAVE-LABEL: define void @test_linear8_bad_stride ; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR6:[0-9]+]] +; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] ; entry: br label %for.body @@ -172,35 +141,27 @@ for.cond.cleanup: define void @test_linear16_wide_stride(ptr noalias %a, ptr readnone %b, i64 %n) { ; NEON-LABEL: define void @test_linear16_wide_stride ; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { -; NEON: [[TMP3:%.*]] = extractelement <2 x ptr> [[TMP2:%.*]], i32 0 -; NEON: [[TMP4:%.*]] = call <2 x i64> @vec_foo_linear16_nomask_neon(ptr [[TMP3]]) ; NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2]] ; ; NEON_INTERLEAVE-LABEL: define void @test_linear16_wide_stride ; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { -; NEON_INTERLEAVE: [[TMP6:%.*]] = extractelement <2 x ptr> [[TMP4:%.*]], i32 0 -; NEON_INTERLEAVE: [[TMP7:%.*]] = call <2 x i64> @vec_foo_linear16_nomask_neon(ptr [[TMP6]]) -; NEON_INTERLEAVE: [[TMP8:%.*]] = extractelement <2 x ptr> [[TMP5:%.*]], i32 0 -; NEON_INTERLEAVE: [[TMP9:%.*]] = call <2 x i64> @vec_foo_linear16_nomask_neon(ptr [[TMP8]]) ; NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2]] ; ; SVE_OR_NEON-LABEL: define void @test_linear16_wide_stride ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_OR_NEON: [[TMP15:%.*]] = extractelement [[TMP14:%.*]], i32 0 -; SVE_OR_NEON: [[TMP16:%.*]] = call @vec_foo_linear16_nomask_sve(ptr [[TMP15]]) ; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR4]] ; ; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear16_wide_stride ; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR6]] +; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3]] ; ; SVE_TF-LABEL: define void @test_linear16_wide_stride ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR6]] +; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3]] ; ; SVE_TF_INTERLEAVE-LABEL: define void @test_linear16_wide_stride ; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR6]] +; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3]] ; entry: br label %for.body @@ -223,57 +184,27 @@ for.cond.cleanup: define void @test_linear4_linear8(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) { ; NEON-LABEL: define void @test_linear4_linear8 ; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) { -; NEON: [[TMP3:%.*]] = extractelement <4 x ptr> [[TMP1:%.*]], i32 0 -; NEON: [[TMP4:%.*]] = extractelement <4 x ptr> [[TMP2:%.*]], i32 0 -; NEON: [[TMP5:%.*]] = call <4 x i32> @vec_quux_linear4_linear8_nomask_neon(ptr [[TMP3]], ptr [[TMP4]]) ; NEON: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] ; ; NEON_INTERLEAVE-LABEL: define void @test_linear4_linear8 ; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) { -; NEON_INTERLEAVE: [[TMP6:%.*]] = extractelement <4 x ptr> [[TMP2:%.*]], i32 0 -; NEON_INTERLEAVE: [[TMP7:%.*]] = extractelement <4 x ptr> [[TMP4:%.*]], i32 0 -; NEON_INTERLEAVE: [[TMP8:%.*]] = call <4 x i32> @vec_quux_linear4_linear8_nomask_neon(ptr [[TMP6]], ptr [[TMP7]]) -; NEON_INTERLEAVE: [[TMP9:%.*]] = extractelement <4 x ptr> [[TMP3:%.*]], i32 0 -; NEON_INTERLEAVE: [[TMP10:%.*]] = extractelement <4 x ptr> [[TMP5:%.*]], i32 0 -; NEON_INTERLEAVE: [[TMP11:%.*]] = call <4 x i32> @vec_quux_linear4_linear8_nomask_neon(ptr [[TMP9]], ptr [[TMP10]]) ; NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] ; ; SVE_OR_NEON-LABEL: define void @test_linear4_linear8 ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_OR_NEON: [[TMP15:%.*]] = extractelement [[TMP13:%.*]], i32 0 -; SVE_OR_NEON: [[TMP16:%.*]] = extractelement [[TMP14:%.*]], i32 0 -; SVE_OR_NEON: [[TMP17:%.*]] = call @vec_quux_linear4_linear8_mask_sve(ptr [[TMP15]], ptr [[TMP16]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; SVE_OR_NEON: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR5:[0-9]+]] ; ; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear4_linear8 ; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_OR_NEON_INTERLEAVE: [[TMP35:%.*]] = extractelement [[TMP31:%.*]], i32 0 -; SVE_OR_NEON_INTERLEAVE: [[TMP36:%.*]] = extractelement [[TMP33:%.*]], i32 0 -; SVE_OR_NEON_INTERLEAVE: [[TMP37:%.*]] = call @vec_quux_linear4_linear8_mask_sve(ptr [[TMP35]], ptr [[TMP36]], [[ACTIVE_LANE_MASK:%.*]]) -; SVE_OR_NEON_INTERLEAVE: [[TMP38:%.*]] = extractelement [[TMP32:%.*]], i32 0 -; SVE_OR_NEON_INTERLEAVE: [[TMP39:%.*]] = extractelement [[TMP34:%.*]], i32 0 -; SVE_OR_NEON_INTERLEAVE: [[TMP40:%.*]] = call @vec_quux_linear4_linear8_mask_sve(ptr [[TMP38]], ptr [[TMP39]], [[ACTIVE_LANE_MASK2:%.*]]) -; SVE_OR_NEON_INTERLEAVE: [[TMP52:%.*]] = extractelement [[TMP50:%.*]], i32 0 -; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR7:[0-9]+]] +; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_linear4_linear8 ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_TF: [[TMP21:%.*]] = extractelement [[TMP19:%.*]], i32 0 -; SVE_TF: [[TMP22:%.*]] = extractelement [[TMP20:%.*]], i32 0 -; SVE_TF: [[TMP23:%.*]] = call @vec_quux_linear4_linear8_mask_sve(ptr [[TMP21]], ptr [[TMP22]], [[ACTIVE_LANE_MASK:%.*]]) -; SVE_TF: [[TMP27:%.*]] = extractelement [[TMP26:%.*]], i32 0 -; SVE_TF: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR7:[0-9]+]] +; SVE_TF: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]] ; ; SVE_TF_INTERLEAVE-LABEL: define void @test_linear4_linear8 ; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_TF_INTERLEAVE: [[TMP35:%.*]] = extractelement [[TMP31:%.*]], i32 0 -; SVE_TF_INTERLEAVE: [[TMP36:%.*]] = extractelement [[TMP33:%.*]], i32 0 -; SVE_TF_INTERLEAVE: [[TMP37:%.*]] = call @vec_quux_linear4_linear8_mask_sve(ptr [[TMP35]], ptr [[TMP36]], [[ACTIVE_LANE_MASK:%.*]]) -; SVE_TF_INTERLEAVE: [[TMP38:%.*]] = extractelement [[TMP32:%.*]], i32 0 -; SVE_TF_INTERLEAVE: [[TMP39:%.*]] = extractelement [[TMP34:%.*]], i32 0 -; SVE_TF_INTERLEAVE: [[TMP40:%.*]] = call @vec_quux_linear4_linear8_mask_sve(ptr [[TMP38]], ptr [[TMP39]], [[ACTIVE_LANE_MASK2:%.*]]) -; SVE_TF_INTERLEAVE: [[TMP52:%.*]] = extractelement [[TMP50:%.*]], i32 0 -; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR7:[0-9]+]] +; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]] ; entry: br label %for.body @@ -310,21 +241,21 @@ define void @test_linear3_non_ptr(ptr noalias %a, i64 %n) { ; ; SVE_OR_NEON-LABEL: define void @test_linear3_non_ptr ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_OR_NEON: [[TMP14:%.*]] = extractelement [[TMP13:%.*]], i32 0 -; SVE_OR_NEON: [[TMP15:%.*]] = call @vec_bar_linear3_nomask_sve(i32 [[TMP14]]) +; SVE_OR_NEON: [[TMP13:%.*]] = extractelement [[TMP12:%.*]], i32 0 +; SVE_OR_NEON: [[TMP14:%.*]] = call @vec_bar_linear3_nomask_sve(i32 [[TMP13]]) ; SVE_OR_NEON: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR6:[0-9]+]] ; ; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear3_non_ptr ; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR8:[0-9]+]] +; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR5:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_linear3_non_ptr ; SVE_TF-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR8:[0-9]+]] +; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR5:[0-9]+]] ; ; SVE_TF_INTERLEAVE-LABEL: define void @test_linear3_non_ptr ; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR8:[0-9]+]] +; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR5:[0-9]+]] ; entry: br label %for.body @@ -361,21 +292,21 @@ define void @test_linearn5_non_ptr_neg_stride(ptr noalias %a, i64 %n) { ; ; SVE_OR_NEON-LABEL: define void @test_linearn5_non_ptr_neg_stride ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_OR_NEON: [[TMP14:%.*]] = extractelement [[TMP13:%.*]], i32 0 -; SVE_OR_NEON: [[TMP15:%.*]] = call @vec_bar_linearn5_nomask_sve(i32 [[TMP14]]) +; SVE_OR_NEON: [[TMP13:%.*]] = extractelement [[TMP12:%.*]], i32 0 +; SVE_OR_NEON: [[TMP14:%.*]] = call @vec_bar_linearn5_nomask_sve(i32 [[TMP13]]) ; SVE_OR_NEON: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR7:[0-9]+]] ; ; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linearn5_non_ptr_neg_stride ; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR9:[0-9]+]] +; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR6:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_linearn5_non_ptr_neg_stride ; SVE_TF-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR9:[0-9]+]] +; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR6:[0-9]+]] ; ; SVE_TF_INTERLEAVE-LABEL: define void @test_linearn5_non_ptr_neg_stride ; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR9:[0-9]+]] +; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR6:[0-9]+]] ; entry: br label %for.body @@ -398,48 +329,27 @@ for.cond.cleanup: define void @test_linear8_return_void(ptr noalias %in, ptr noalias %out, i64 %n) { ; NEON-LABEL: define void @test_linear8_return_void ; NEON-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) { -; NEON: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP3:%.*]], i32 0 -; NEON: call void @vec_goo_linear8_nomask_neon(<2 x i64> [[WIDE_LOAD:%.*]], ptr [[TMP4]]) ; NEON: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR6:[0-9]+]] ; ; NEON_INTERLEAVE-LABEL: define void @test_linear8_return_void ; NEON_INTERLEAVE-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) { -; NEON_INTERLEAVE: [[TMP8:%.*]] = extractelement <2 x ptr> [[TMP6:%.*]], i32 0 -; NEON_INTERLEAVE: call void @vec_goo_linear8_nomask_neon(<2 x i64> [[WIDE_LOAD:%.*]], ptr [[TMP8]]) -; NEON_INTERLEAVE: [[TMP9:%.*]] = extractelement <2 x ptr> [[TMP7:%.*]], i32 0 -; NEON_INTERLEAVE: call void @vec_goo_linear8_nomask_neon(<2 x i64> [[WIDE_LOAD2:%.*]], ptr [[TMP9]]) ; NEON_INTERLEAVE: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR6:[0-9]+]] ; ; SVE_OR_NEON-LABEL: define void @test_linear8_return_void ; SVE_OR_NEON-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_OR_NEON: [[TMP16:%.*]] = extractelement [[TMP15:%.*]], i32 0 -; SVE_OR_NEON: call void @vec_goo_linear8_nomask_sve( [[WIDE_LOAD:%.*]], ptr [[TMP16]]) ; SVE_OR_NEON: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR8:[0-9]+]] ; ; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear8_return_void ; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_OR_NEON_INTERLEAVE: [[TMP39:%.*]] = extractelement [[TMP37:%.*]], i32 0 -; SVE_OR_NEON_INTERLEAVE: call void @vec_goo_linear8_mask_sve( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP39]], [[ACTIVE_LANE_MASK:%.*]]) -; SVE_OR_NEON_INTERLEAVE: [[TMP40:%.*]] = extractelement [[TMP38:%.*]], i32 0 -; SVE_OR_NEON_INTERLEAVE: call void @vec_goo_linear8_mask_sve( [[WIDE_MASKED_LOAD4:%.*]], ptr [[TMP40]], [[ACTIVE_LANE_MASK2:%.*]]) -; SVE_OR_NEON_INTERLEAVE: [[TMP46:%.*]] = extractelement [[TMP44:%.*]], i32 0 -; SVE_OR_NEON_INTERLEAVE: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR10:[0-9]+]] +; SVE_OR_NEON_INTERLEAVE: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR7:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_linear8_return_void ; SVE_TF-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_TF: [[TMP22:%.*]] = extractelement [[TMP21:%.*]], i32 0 -; SVE_TF: call void @vec_goo_linear8_mask_sve( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22]], [[ACTIVE_LANE_MASK:%.*]]) -; SVE_TF: [[TMP24:%.*]] = extractelement [[TMP23:%.*]], i32 0 -; SVE_TF: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR10:[0-9]+]] +; SVE_TF: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR7:[0-9]+]] ; ; SVE_TF_INTERLEAVE-LABEL: define void @test_linear8_return_void ; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_TF_INTERLEAVE: [[TMP39:%.*]] = extractelement [[TMP37:%.*]], i32 0 -; SVE_TF_INTERLEAVE: call void @vec_goo_linear8_mask_sve( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP39]], [[ACTIVE_LANE_MASK:%.*]]) -; SVE_TF_INTERLEAVE: [[TMP40:%.*]] = extractelement [[TMP38:%.*]], i32 0 -; SVE_TF_INTERLEAVE: call void @vec_goo_linear8_mask_sve( [[WIDE_MASKED_LOAD4:%.*]], ptr [[TMP40]], [[ACTIVE_LANE_MASK2:%.*]]) -; SVE_TF_INTERLEAVE: [[TMP46:%.*]] = extractelement [[TMP44:%.*]], i32 0 -; SVE_TF_INTERLEAVE: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR10:[0-9]+]] +; SVE_TF_INTERLEAVE: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR7:[0-9]+]] ; entry: br label %for.body @@ -458,6 +368,9 @@ for.cond.cleanup: ret void } +; Note: Vectorizing pointer arguments is currently disabled as LAA cannot detect +; aliasing from output/input pointers. + declare i64 @foo(ptr) declare i32 @baz(i32, ptr) declare i32 @quux(ptr, ptr)