076f82
commit baf3ece63453adac59c5688930324a78ced5b2e4
076f82
Author: Noah Goldstein <goldstein.w.n@gmail.com>
076f82
Date:   Sat Oct 23 01:26:47 2021 -0400
076f82
076f82
    x86: Replace sse2 instructions with avx in memcmp-evex-movbe.S
076f82
    
076f82
    This commit replaces two usages of SSE2 'movups' with AVX 'vmovdqu'.
076f82
    
076f82
    it could potentially be dangerous to use SSE2 if this function is ever
076f82
    called without using 'vzeroupper' beforehand. While compilers appear
076f82
    to use 'vzeroupper' before function calls if AVX2 has been used, using
076f82
    SSE2 here is more brittle. Since it is not absolutely necessary it
076f82
    should be avoided.
076f82
    
076f82
    It costs 2-extra bytes but the extra bytes should only eat into
076f82
    alignment padding.
076f82
    Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
076f82
    
076f82
    (cherry picked from commit bad852b61b79503fcb3c5fc379c70f768df3e1fb)
076f82
076f82
diff --git a/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S
076f82
index 2761b54f2e7dea9f..640f6757fac8a356 100644
076f82
--- a/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S
076f82
+++ b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S
076f82
@@ -561,13 +561,13 @@ L(between_16_31):
076f82
 	/* From 16 to 31 bytes.  No branch when size == 16.  */
076f82
 
076f82
 	/* Use movups to save code size.  */
076f82
-	movups	(%rsi), %xmm2
076f82
+	vmovdqu	(%rsi), %xmm2
076f82
 	VPCMP	$4, (%rdi), %xmm2, %k1
076f82
 	kmovd	%k1, %eax
076f82
 	testl	%eax, %eax
076f82
 	jnz	L(return_vec_0_lv)
076f82
 	/* Use overlapping loads to avoid branches.  */
076f82
-	movups	-16(%rsi, %rdx, CHAR_SIZE), %xmm2
076f82
+	vmovdqu	-16(%rsi, %rdx, CHAR_SIZE), %xmm2
076f82
 	VPCMP	$4, -16(%rdi, %rdx, CHAR_SIZE), %xmm2, %k1
076f82
 	addl	$(CHAR_PER_VEC - (16 / CHAR_SIZE)), %edx
076f82
 	kmovd	%k1, %eax