-
Notifications
You must be signed in to change notification settings - Fork 12
/
memcmp.s
37 lines (34 loc) · 1.54 KB
/
memcmp.s
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# 2020, Georg Sauthoff <mail@gms.tf>, LGPLv3+
.text
# .balign 4
# int memcmp(const void *u, const void *v, size_t n);
#
# a0 = u, a1 = v, a2 = n
#
.global memcmp
memcmp:
vsetvli a3, a2, e8, m8 # switch to 8 bit element size,
# i.e. 4 groups of 8 registers
vlb.v v8, (a0) # load a3 bytes from u
vlb.v v16, (a1) # load a3 bytes from v
vmsne.vv v0, v8, v16 # set mask bit if elements are not equal
vfirst.m a4, v0 # find lowest index of set mask bit
bgez a4, 1f # branch if greater-or-equal to zero
add a0, a0, a3 # increment u
add a1, a1, a3 # increment v
sub a2, a2, a3 # decrement n
bnez a2, memcmp # branch if not-equal to zero, i.e. cont. loop
li a0, 0 # return 0
ret
1:
vslidedown.vx v0, v8, a4 # move matching element to the first position
vslidedown.vx v24, v16, a4 # move matching element to the first position
li a5, 1 # load immediate
vsetvli t0, a5, e8, m1 # change number of groups for the widening op
# (with m8 it would yield an illegal instr.)
vwsubu.vv v8, v0, v24 # widening unsigned subtract,
# i.e. zero-extend operands
vsetvli t0, a5, e16, m1 # update config to access the widened result
vmv.x.s a0, v8 # move the first element in v8 to scalar a0
# (also sign-extends it)
ret