From 7d4f7c724bc40e822bc6f2f75ccd013df66ecb39 Mon Sep 17 00:00:00 2001 From: Olaf Bernstein Date: Sat, 17 Aug 2024 15:45:17 +0200 Subject: [PATCH] add vrev8.v byteswap --- bench/byteswap.S | 16 ++++++++++++++++ bench/byteswap.c | 8 ++++++++ 2 files changed, 24 insertions(+) diff --git a/bench/byteswap.S b/bench/byteswap.S index 367cc1f..18a505c 100644 --- a/bench/byteswap.S +++ b/bench/byteswap.S @@ -1,4 +1,20 @@ #if HAS_RVV_1_0 + +#if defined(MX) && __riscv_zvbb +.global MX(byteswap32_rvv_vrev8_) +MX(byteswap32_rvv_vrev8_): +1: + vsetvli t0, a1, e32, MX(), ta, ma + vle32.v v0, (a0) + vrev8.v v8, v0 + vse32.v v8, (a0) + sub a1, a1, t0 + slli t1, t0, 2 + add a0, a0, t1 + bnez a1, 1b + ret +#endif + #if MX_N == 4 || MX_N == 2 || MX_N == 1 # a0 = ptr, a1 = len diff --git a/bench/byteswap.c b/bench/byteswap.c index ee98d94..75ca715 100644 --- a/bench/byteswap.c +++ b/bench/byteswap.c @@ -53,10 +53,18 @@ byteswap32_SWAR_rev8(uint32_t *ptr, size_t n) f(rvv_m1_gatherei16s_m8) #endif +#if __riscv_zvbb +#define IMPLS_ZVBB(f) MX(f,rvv_vrev8) +#else +#define IMPLS_ZVBB(f) +#endif + + #define IMPLS(f) \ f(scalar) \ f(scalar_autovec) \ REV8(f) \ + IMPLS_ZVBB(f) \ IMPLS_RVV(f) typedef void Func(uint32_t *ptr, size_t n);