From e69d9981f858a38e19304e6ff5ebdf89f2cb0ba0 Mon Sep 17 00:00:00 2001 From: Wilco Dijkstra Date: Tue, 10 Aug 2021 13:44:27 +0100 Subject: [PATCH] [4/5] AArch64: Improve A64FX memset by removing unroll32 Remove unroll32 code since it doesn't improve performance. Reviewed-by: Naohiro Tamura --- sysdeps/aarch64/multiarch/memset_a64fx.S | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/sysdeps/aarch64/multiarch/memset_a64fx.S b/sysdeps/aarch64/multiarch/memset_a64fx.S index 337c86be6f..ef0315658a 100644 --- a/sysdeps/aarch64/multiarch/memset_a64fx.S +++ b/sysdeps/aarch64/multiarch/memset_a64fx.S @@ -102,22 +102,6 @@ L(vl_agnostic): // VL Agnostic ccmp vector_length, tmp1, 0, cs b.eq L(L1_prefetch) -L(unroll32): - lsl tmp1, vector_length, 3 // vector_length * 8 - lsl tmp2, vector_length, 5 // vector_length * 32 - .p2align 3 -1: cmp rest, tmp2 - b.cc L(unroll8) - st1b_unroll - add dst, dst, tmp1 - st1b_unroll - add dst, dst, tmp1 - st1b_unroll - add dst, dst, tmp1 - st1b_unroll - add dst, dst, tmp1 - sub rest, rest, tmp2 - b 1b L(unroll8): lsl tmp1, vector_length, 3 @@ -155,7 +139,7 @@ L(L1_prefetch): // if rest >= L1_SIZE sub rest, rest, CACHE_LINE_SIZE * 2 cmp rest, L1_SIZE b.ge 1b - cbnz rest, L(unroll32) + cbnz rest, L(unroll8) ret // count >= L2_SIZE -- 2.31.1