190885
From e7e0ac928b21e5f47e5b648723851c5270db24f2 Mon Sep 17 00:00:00 2001
190885
From: Noah Goldstein <goldstein.w.n@gmail.com>
190885
Date: Sat, 23 Oct 2021 01:26:47 -0400
190885
Subject: [PATCH] x86: Replace sse2 instructions with avx in
190885
 memcmp-evex-movbe.S
190885
190885
This commit replaces two usages of SSE2 'movups' with AVX 'vmovdqu'.
190885
190885
it could potentially be dangerous to use SSE2 if this function is ever
190885
called without using 'vzeroupper' beforehand. While compilers appear
190885
to use 'vzeroupper' before function calls if AVX2 has been used, using
190885
SSE2 here is more brittle. Since it is not absolutely necessary it
190885
should be avoided.
190885
190885
It costs 2-extra bytes but the extra bytes should only eat into
190885
alignment padding.
190885
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
190885
190885
(cherry picked from commit bad852b61b79503fcb3c5fc379c70f768df3e1fb)
190885
---
190885
 sysdeps/x86_64/multiarch/memcmp-evex-movbe.S | 4 ++--
190885
 1 file changed, 2 insertions(+), 2 deletions(-)
190885
190885
diff --git a/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S
190885
index 2761b54f..640f6757 100644
190885
--- a/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S
190885
+++ b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S
190885
@@ -561,13 +561,13 @@ L(between_16_31):
190885
 	/* From 16 to 31 bytes.  No branch when size == 16.  */
190885
 
190885
 	/* Use movups to save code size.  */
190885
-	movups	(%rsi), %xmm2
190885
+	vmovdqu	(%rsi), %xmm2
190885
 	VPCMP	$4, (%rdi), %xmm2, %k1
190885
 	kmovd	%k1, %eax
190885
 	testl	%eax, %eax
190885
 	jnz	L(return_vec_0_lv)
190885
 	/* Use overlapping loads to avoid branches.  */
190885
-	movups	-16(%rsi, %rdx, CHAR_SIZE), %xmm2
190885
+	vmovdqu	-16(%rsi, %rdx, CHAR_SIZE), %xmm2
190885
 	VPCMP	$4, -16(%rdi, %rdx, CHAR_SIZE), %xmm2, %k1
190885
 	addl	$(CHAR_PER_VEC - (16 / CHAR_SIZE)), %edx
190885
 	kmovd	%k1, %eax
190885
-- 
190885
GitLab
190885