08c3a6
commit 190ea5f7e4e7e98b9b6e3f29835ae8b1f6a5442e
08c3a6
Author: Noah Goldstein <goldstein.w.n@gmail.com>
08c3a6
Date:   Mon Feb 7 00:32:23 2022 -0600
08c3a6
08c3a6
    x86: Remove SSSE3 instruction for broadcast in memset.S (SSE2 Only)
08c3a6
    
08c3a6
    commit b62ace2740a106222e124cc86956448fa07abf4d
08c3a6
    Author: Noah Goldstein <goldstein.w.n@gmail.com>
08c3a6
    Date:   Sun Feb 6 00:54:18 2022 -0600
08c3a6
    
08c3a6
        x86: Improve vec generation in memset-vec-unaligned-erms.S
08c3a6
    
08c3a6
    Revert usage of 'pshufb' in broadcast logic as it is an SSSE3
08c3a6
    instruction and memset.S is restricted to only SSE2 instructions.
08c3a6
    
08c3a6
    (cherry picked from commit 1b0c60f95bbe2eded80b2bb5be75c0e45b11cde1)
08c3a6
08c3a6
diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S
08c3a6
index 34ee0bfdcb81fb39..954471e5a5bf225b 100644
08c3a6
--- a/sysdeps/x86_64/memset.S
08c3a6
+++ b/sysdeps/x86_64/memset.S
08c3a6
@@ -30,9 +30,10 @@
08c3a6
 
08c3a6
 # define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
08c3a6
   movd d, %xmm0; \
08c3a6
-  pxor %xmm1, %xmm1; \
08c3a6
-  pshufb %xmm1, %xmm0; \
08c3a6
-  movq r, %rax
08c3a6
+  movq r, %rax; \
08c3a6
+  punpcklbw %xmm0, %xmm0; \
08c3a6
+  punpcklwd %xmm0, %xmm0; \
08c3a6
+  pshufd $0, %xmm0, %xmm0
08c3a6
 
08c3a6
 # define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
08c3a6
   movd d, %xmm0; \