08c3a6
commit baf3ece63453adac59c5688930324a78ced5b2e4
08c3a6
Author: Noah Goldstein <goldstein.w.n@gmail.com>
08c3a6
Date:   Sat Oct 23 01:26:47 2021 -0400
08c3a6
08c3a6
    x86: Replace sse2 instructions with avx in memcmp-evex-movbe.S
08c3a6
    
08c3a6
    This commit replaces two usages of SSE2 'movups' with AVX 'vmovdqu'.
08c3a6
    
08c3a6
    it could potentially be dangerous to use SSE2 if this function is ever
08c3a6
    called without using 'vzeroupper' beforehand. While compilers appear
08c3a6
    to use 'vzeroupper' before function calls if AVX2 has been used, using
08c3a6
    SSE2 here is more brittle. Since it is not absolutely necessary it
08c3a6
    should be avoided.
08c3a6
    
08c3a6
    It costs 2-extra bytes but the extra bytes should only eat into
08c3a6
    alignment padding.
08c3a6
    Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
08c3a6
    
08c3a6
    (cherry picked from commit bad852b61b79503fcb3c5fc379c70f768df3e1fb)
08c3a6
08c3a6
diff --git a/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S
08c3a6
index 2761b54f2e7dea9f..640f6757fac8a356 100644
08c3a6
--- a/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S
08c3a6
+++ b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S
08c3a6
@@ -561,13 +561,13 @@ L(between_16_31):
08c3a6
 	/* From 16 to 31 bytes.  No branch when size == 16.  */
08c3a6
 
08c3a6
 	/* Use movups to save code size.  */
08c3a6
-	movups	(%rsi), %xmm2
08c3a6
+	vmovdqu	(%rsi), %xmm2
08c3a6
 	VPCMP	$4, (%rdi), %xmm2, %k1
08c3a6
 	kmovd	%k1, %eax
08c3a6
 	testl	%eax, %eax
08c3a6
 	jnz	L(return_vec_0_lv)
08c3a6
 	/* Use overlapping loads to avoid branches.  */
08c3a6
-	movups	-16(%rsi, %rdx, CHAR_SIZE), %xmm2
08c3a6
+	vmovdqu	-16(%rsi, %rdx, CHAR_SIZE), %xmm2
08c3a6
 	VPCMP	$4, -16(%rdi, %rdx, CHAR_SIZE), %xmm2, %k1
08c3a6
 	addl	$(CHAR_PER_VEC - (16 / CHAR_SIZE)), %edx
08c3a6
 	kmovd	%k1, %eax