| From e69d9981f858a38e19304e6ff5ebdf89f2cb0ba0 Mon Sep 17 00:00:00 2001 |
| From: Wilco Dijkstra <wdijkstr@arm.com> |
| Date: Tue, 10 Aug 2021 13:44:27 +0100 |
| Subject: [PATCH] [4/5] AArch64: Improve A64FX memset by removing unroll32 |
| |
| Remove unroll32 code since it doesn't improve performance. |
| |
| Reviewed-by: Naohiro Tamura <naohirot@fujitsu.com> |
| |
| sysdeps/aarch64/multiarch/memset_a64fx.S | 18 +----------------- |
| 1 file changed, 1 insertion(+), 17 deletions(-) |
| |
| diff --git a/sysdeps/aarch64/multiarch/memset_a64fx.S b/sysdeps/aarch64/multiarch/memset_a64fx.S |
| index 337c86be6f..ef0315658a 100644 |
| |
| |
| @@ -102,22 +102,6 @@ L(vl_agnostic): // VL Agnostic |
| ccmp vector_length, tmp1, 0, cs |
| b.eq L(L1_prefetch) |
| |
| -L(unroll32): |
| - lsl tmp1, vector_length, 3 // vector_length * 8 |
| - lsl tmp2, vector_length, 5 // vector_length * 32 |
| - .p2align 3 |
| -1: cmp rest, tmp2 |
| - b.cc L(unroll8) |
| - st1b_unroll |
| - add dst, dst, tmp1 |
| - st1b_unroll |
| - add dst, dst, tmp1 |
| - st1b_unroll |
| - add dst, dst, tmp1 |
| - st1b_unroll |
| - add dst, dst, tmp1 |
| - sub rest, rest, tmp2 |
| - b 1b |
| |
| L(unroll8): |
| lsl tmp1, vector_length, 3 |
| @@ -155,7 +139,7 @@ L(L1_prefetch): // if rest >= L1_SIZE |
| sub rest, rest, CACHE_LINE_SIZE * 2 |
| cmp rest, L1_SIZE |
| b.ge 1b |
| - cbnz rest, L(unroll32) |
| + cbnz rest, L(unroll8) |
| ret |
| |
| // count >= L2_SIZE |
| -- |
| 2.31.1 |
| |