| From 4412c21a9027bbe6546b2a329a741d26c2477136 Mon Sep 17 00:00:00 2001 |
| From: Noah Goldstein <goldstein.w.n@gmail.com> |
| Date: Mon, 7 Feb 2022 00:32:23 -0600 |
| Subject: [PATCH] x86: Remove SSSE3 instruction for broadcast in memset.S (SSE2 |
| Only) |
| |
| commit b62ace2740a106222e124cc86956448fa07abf4d |
| Author: Noah Goldstein <goldstein.w.n@gmail.com> |
| Date: Sun Feb 6 00:54:18 2022 -0600 |
| |
| x86: Improve vec generation in memset-vec-unaligned-erms.S |
| |
| Revert usage of 'pshufb' in broadcast logic as it is an SSSE3 |
| instruction and memset.S is restricted to only SSE2 instructions. |
| |
| (cherry picked from commit 1b0c60f95bbe2eded80b2bb5be75c0e45b11cde1) |
| |
| sysdeps/x86_64/memset.S | 7 ++++--- |
| 1 file changed, 4 insertions(+), 3 deletions(-) |
| |
| diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S |
| index 27debd2b..4cb4aa71 100644 |
| |
| |
| @@ -30,9 +30,10 @@ |
| |
| # define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ |
| movd d, %xmm0; \ |
| - pxor %xmm1, %xmm1; \ |
| - pshufb %xmm1, %xmm0; \ |
| - movq r, %rax |
| + movq r, %rax; \ |
| + punpcklbw %xmm0, %xmm0; \ |
| + punpcklwd %xmm0, %xmm0; \ |
| + pshufd $0, %xmm0, %xmm0 |
| |
| # define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \ |
| movd d, %xmm0; \ |
| -- |
| GitLab |
| |