190885
From d8d5c44ed7636fdd2b736e152f8207ca063da386 Mon Sep 17 00:00:00 2001
190885
From: Noah Goldstein <goldstein.w.n@gmail.com>
190885
Date: Wed, 9 Jun 2021 16:25:32 -0400
190885
Subject: [PATCH] x86: Fix overflow bug with wmemchr-sse2 and wmemchr-avx2 [BZ
190885
 #27974]
190885
190885
This commit fixes the bug mentioned in the previous commit.
190885
190885
The previous implementations of wmemchr in these files relied
190885
on n * sizeof(wchar_t) which was not guranteed by the standard.
190885
190885
The new overflow tests added in the previous commit now
190885
pass (As well as all the other tests).
190885
190885
Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
190885
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
190885
(cherry picked from commit 645a158978f9520e74074e8c14047503be4db0f0)
190885
---
190885
 sysdeps/x86_64/memchr.S                | 77 +++++++++++++++++++-------
190885
 sysdeps/x86_64/multiarch/memchr-avx2.S | 58 +++++++++++++------
190885
 2 files changed, 98 insertions(+), 37 deletions(-)
190885
190885
diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S
190885
index cb320257..24f9a0c5 100644
190885
--- a/sysdeps/x86_64/memchr.S
190885
+++ b/sysdeps/x86_64/memchr.S
190885
@@ -21,9 +21,11 @@
190885
 #ifdef USE_AS_WMEMCHR
190885
 # define MEMCHR		wmemchr
190885
 # define PCMPEQ		pcmpeqd
190885
+# define CHAR_PER_VEC	4
190885
 #else
190885
 # define MEMCHR		memchr
190885
 # define PCMPEQ		pcmpeqb
190885
+# define CHAR_PER_VEC	16
190885
 #endif
190885
 
190885
 /* fast SSE2 version with using pmaxub and 64 byte loop */
190885
@@ -33,15 +35,14 @@ ENTRY(MEMCHR)
190885
 	movd	%esi, %xmm1
190885
 	mov	%edi, %ecx
190885
 
190885
+#ifdef __ILP32__
190885
+	/* Clear the upper 32 bits.  */
190885
+	movl	%edx, %edx
190885
+#endif
190885
 #ifdef USE_AS_WMEMCHR
190885
 	test	%RDX_LP, %RDX_LP
190885
 	jz	L(return_null)
190885
-	shl	$2, %RDX_LP
190885
 #else
190885
-# ifdef __ILP32__
190885
-	/* Clear the upper 32 bits.  */
190885
-	movl	%edx, %edx
190885
-# endif
190885
 	punpcklbw %xmm1, %xmm1
190885
 	test	%RDX_LP, %RDX_LP
190885
 	jz	L(return_null)
190885
@@ -60,13 +61,16 @@ ENTRY(MEMCHR)
190885
 	test	%eax, %eax
190885
 
190885
 	jnz	L(matches_1)
190885
-	sub	$16, %rdx
190885
+	sub	$CHAR_PER_VEC, %rdx
190885
 	jbe	L(return_null)
190885
 	add	$16, %rdi
190885
 	and	$15, %ecx
190885
 	and	$-16, %rdi
190885
+#ifdef USE_AS_WMEMCHR
190885
+	shr	$2, %ecx
190885
+#endif
190885
 	add	%rcx, %rdx
190885
-	sub	$64, %rdx
190885
+	sub	$(CHAR_PER_VEC * 4), %rdx
190885
 	jbe	L(exit_loop)
190885
 	jmp	L(loop_prolog)
190885
 
190885
@@ -77,16 +81,21 @@ L(crosscache):
190885
 	movdqa	(%rdi), %xmm0
190885
 
190885
 	PCMPEQ	%xmm1, %xmm0
190885
-/* Check if there is a match.  */
190885
+	/* Check if there is a match.  */
190885
 	pmovmskb %xmm0, %eax
190885
-/* Remove the leading bytes.  */
190885
+	/* Remove the leading bytes.  */
190885
 	sar	%cl, %eax
190885
 	test	%eax, %eax
190885
 	je	L(unaligned_no_match)
190885
-/* Check which byte is a match.  */
190885
+	/* Check which byte is a match.  */
190885
 	bsf	%eax, %eax
190885
-
190885
+#ifdef USE_AS_WMEMCHR
190885
+	mov	%eax, %esi
190885
+	shr	$2, %esi
190885
+	sub	%rsi, %rdx
190885
+#else
190885
 	sub	%rax, %rdx
190885
+#endif
190885
 	jbe	L(return_null)
190885
 	add	%rdi, %rax
190885
 	add	%rcx, %rax
190885
@@ -94,15 +103,18 @@ L(crosscache):
190885
 
190885
 	.p2align 4
190885
 L(unaligned_no_match):
190885
-        /* "rcx" is less than 16.  Calculate "rdx + rcx - 16" by using
190885
+	/* "rcx" is less than 16.  Calculate "rdx + rcx - 16" by using
190885
 	   "rdx - (16 - rcx)" instead of "(rdx + rcx) - 16" to void
190885
 	   possible addition overflow.  */
190885
 	neg	%rcx
190885
 	add	$16, %rcx
190885
+#ifdef USE_AS_WMEMCHR
190885
+	shr	$2, %ecx
190885
+#endif
190885
 	sub	%rcx, %rdx
190885
 	jbe	L(return_null)
190885
 	add	$16, %rdi
190885
-	sub	$64, %rdx
190885
+	sub	$(CHAR_PER_VEC * 4), %rdx
190885
 	jbe	L(exit_loop)
190885
 
190885
 	.p2align 4
190885
@@ -135,7 +147,7 @@ L(loop_prolog):
190885
 	test	$0x3f, %rdi
190885
 	jz	L(align64_loop)
190885
 
190885
-	sub	$64, %rdx
190885
+	sub	$(CHAR_PER_VEC * 4), %rdx
190885
 	jbe	L(exit_loop)
190885
 
190885
 	movdqa	(%rdi), %xmm0
190885
@@ -167,11 +179,14 @@ L(loop_prolog):
190885
 	mov	%rdi, %rcx
190885
 	and	$-64, %rdi
190885
 	and	$63, %ecx
190885
+#ifdef USE_AS_WMEMCHR
190885
+	shr	$2, %ecx
190885
+#endif
190885
 	add	%rcx, %rdx
190885
 
190885
 	.p2align 4
190885
 L(align64_loop):
190885
-	sub	$64, %rdx
190885
+	sub	$(CHAR_PER_VEC * 4), %rdx
190885
 	jbe	L(exit_loop)
190885
 	movdqa	(%rdi), %xmm0
190885
 	movdqa	16(%rdi), %xmm2
190885
@@ -218,7 +233,7 @@ L(align64_loop):
190885
 
190885
 	.p2align 4
190885
 L(exit_loop):
190885
-	add	$32, %edx
190885
+	add	$(CHAR_PER_VEC * 2), %edx
190885
 	jle	L(exit_loop_32)
190885
 
190885
 	movdqa	(%rdi), %xmm0
190885
@@ -238,7 +253,7 @@ L(exit_loop):
190885
 	pmovmskb %xmm3, %eax
190885
 	test	%eax, %eax
190885
 	jnz	L(matches32_1)
190885
-	sub	$16, %edx
190885
+	sub	$CHAR_PER_VEC, %edx
190885
 	jle	L(return_null)
190885
 
190885
 	PCMPEQ	48(%rdi), %xmm1
190885
@@ -250,13 +265,13 @@ L(exit_loop):
190885
 
190885
 	.p2align 4
190885
 L(exit_loop_32):
190885
-	add	$32, %edx
190885
+	add	$(CHAR_PER_VEC * 2), %edx
190885
 	movdqa	(%rdi), %xmm0
190885
 	PCMPEQ	%xmm1, %xmm0
190885
 	pmovmskb %xmm0, %eax
190885
 	test	%eax, %eax
190885
 	jnz	L(matches_1)
190885
-	sub	$16, %edx
190885
+	sub	$CHAR_PER_VEC, %edx
190885
 	jbe	L(return_null)
190885
 
190885
 	PCMPEQ	16(%rdi), %xmm1
190885
@@ -293,7 +308,13 @@ L(matches32):
190885
 	.p2align 4
190885
 L(matches_1):
190885
 	bsf	%eax, %eax
190885
+#ifdef USE_AS_WMEMCHR
190885
+	mov	%eax, %esi
190885
+	shr	$2, %esi
190885
+	sub	%rsi, %rdx
190885
+#else
190885
 	sub	%rax, %rdx
190885
+#endif
190885
 	jbe	L(return_null)
190885
 	add	%rdi, %rax
190885
 	ret
190885
@@ -301,7 +322,13 @@ L(matches_1):
190885
 	.p2align 4
190885
 L(matches16_1):
190885
 	bsf	%eax, %eax
190885
+#ifdef USE_AS_WMEMCHR
190885
+	mov	%eax, %esi
190885
+	shr	$2, %esi
190885
+	sub	%rsi, %rdx
190885
+#else
190885
 	sub	%rax, %rdx
190885
+#endif
190885
 	jbe	L(return_null)
190885
 	lea	16(%rdi, %rax), %rax
190885
 	ret
190885
@@ -309,7 +336,13 @@ L(matches16_1):
190885
 	.p2align 4
190885
 L(matches32_1):
190885
 	bsf	%eax, %eax
190885
+#ifdef USE_AS_WMEMCHR
190885
+	mov	%eax, %esi
190885
+	shr	$2, %esi
190885
+	sub	%rsi, %rdx
190885
+#else
190885
 	sub	%rax, %rdx
190885
+#endif
190885
 	jbe	L(return_null)
190885
 	lea	32(%rdi, %rax), %rax
190885
 	ret
190885
@@ -317,7 +350,13 @@ L(matches32_1):
190885
 	.p2align 4
190885
 L(matches48_1):
190885
 	bsf	%eax, %eax
190885
+#ifdef USE_AS_WMEMCHR
190885
+	mov	%eax, %esi
190885
+	shr	$2, %esi
190885
+	sub	%rsi, %rdx
190885
+#else
190885
 	sub	%rax, %rdx
190885
+#endif
190885
 	jbe	L(return_null)
190885
 	lea	48(%rdi, %rax), %rax
190885
 	ret
190885
diff --git a/sysdeps/x86_64/multiarch/memchr-avx2.S b/sysdeps/x86_64/multiarch/memchr-avx2.S
190885
index b377f22e..16027abb 100644
190885
--- a/sysdeps/x86_64/multiarch/memchr-avx2.S
190885
+++ b/sysdeps/x86_64/multiarch/memchr-avx2.S
190885
@@ -54,21 +54,19 @@
190885
 
190885
 # define VEC_SIZE 32
190885
 # define PAGE_SIZE 4096
190885
+# define CHAR_PER_VEC	(VEC_SIZE / CHAR_SIZE)
190885
 
190885
 	.section SECTION(.text),"ax",@progbits
190885
 ENTRY (MEMCHR)
190885
 # ifndef USE_AS_RAWMEMCHR
190885
 	/* Check for zero length.  */
190885
-	test	%RDX_LP, %RDX_LP
190885
-	jz	L(null)
190885
-# endif
190885
-# ifdef USE_AS_WMEMCHR
190885
-	shl	$2, %RDX_LP
190885
-# else
190885
 #  ifdef __ILP32__
190885
-	/* Clear the upper 32 bits.  */
190885
-	movl	%edx, %edx
190885
+	/* Clear upper bits.  */
190885
+	and	%RDX_LP, %RDX_LP
190885
+#  else
190885
+	test	%RDX_LP, %RDX_LP
190885
 #  endif
190885
+	jz	L(null)
190885
 # endif
190885
 	/* Broadcast CHAR to YMMMATCH.  */
190885
 	vmovd	%esi, %xmm0
190885
@@ -84,7 +82,7 @@ ENTRY (MEMCHR)
190885
 	vpmovmskb %ymm1, %eax
190885
 # ifndef USE_AS_RAWMEMCHR
190885
 	/* If length < CHAR_PER_VEC handle special.  */
190885
-	cmpq	$VEC_SIZE, %rdx
190885
+	cmpq	$CHAR_PER_VEC, %rdx
190885
 	jbe	L(first_vec_x0)
190885
 # endif
190885
 	testl	%eax, %eax
190885
@@ -98,6 +96,10 @@ ENTRY (MEMCHR)
190885
 L(first_vec_x0):
190885
 	/* Check if first match was before length.  */
190885
 	tzcntl	%eax, %eax
190885
+#  ifdef USE_AS_WMEMCHR
190885
+	/* NB: Multiply length by 4 to get byte count.  */
190885
+	sall	$2, %edx
190885
+#  endif
190885
 	xorl	%ecx, %ecx
190885
 	cmpl	%eax, %edx
190885
 	leaq	(%rdi, %rax), %rax
190885
@@ -110,12 +112,12 @@ L(null):
190885
 # endif
190885
 	.p2align 4
190885
 L(cross_page_boundary):
190885
-	/* Save pointer before aligning as its original value is necessary
190885
-	   for computer return address if byte is found or adjusting length
190885
-	   if it is not and this is memchr.  */
190885
+	/* Save pointer before aligning as its original value is
190885
+	   necessary for computer return address if byte is found or
190885
+	   adjusting length if it is not and this is memchr.  */
190885
 	movq	%rdi, %rcx
190885
-	/* Align data to VEC_SIZE - 1. ALGN_PTR_REG is rcx for memchr and
190885
-	   rdi for rawmemchr.  */
190885
+	/* Align data to VEC_SIZE - 1. ALGN_PTR_REG is rcx for memchr
190885
+	   and rdi for rawmemchr.  */
190885
 	orq	$(VEC_SIZE - 1), %ALGN_PTR_REG
190885
 	VPCMPEQ	-(VEC_SIZE - 1)(%ALGN_PTR_REG), %ymm0, %ymm1
190885
 	vpmovmskb %ymm1, %eax
190885
@@ -124,6 +126,10 @@ L(cross_page_boundary):
190885
 	   match).  */
190885
 	leaq	1(%ALGN_PTR_REG), %rsi
190885
 	subq	%RRAW_PTR_REG, %rsi
190885
+#  ifdef USE_AS_WMEMCHR
190885
+	/* NB: Divide bytes by 4 to get wchar_t count.  */
190885
+	shrl	$2, %esi
190885
+#  endif
190885
 # endif
190885
 	/* Remove the leading bytes.  */
190885
 	sarxl	%ERAW_PTR_REG, %eax, %eax
190885
@@ -181,6 +187,10 @@ L(cross_page_continue):
190885
 	orq	$(VEC_SIZE - 1), %rdi
190885
 	/* esi is for adjusting length to see if near the end.  */
190885
 	leal	(VEC_SIZE * 4 + 1)(%rdi, %rcx), %esi
190885
+#  ifdef USE_AS_WMEMCHR
190885
+	/* NB: Divide bytes by 4 to get the wchar_t count.  */
190885
+	sarl	$2, %esi
190885
+#  endif
190885
 # else
190885
 	orq	$(VEC_SIZE - 1), %rdi
190885
 L(cross_page_continue):
190885
@@ -213,7 +223,7 @@ L(cross_page_continue):
190885
 
190885
 # ifndef USE_AS_RAWMEMCHR
190885
 	/* Check if at last VEC_SIZE * 4 length.  */
190885
-	subq	$(VEC_SIZE * 4), %rdx
190885
+	subq	$(CHAR_PER_VEC * 4), %rdx
190885
 	jbe	L(last_4x_vec_or_less_cmpeq)
190885
 	/* Align data to VEC_SIZE * 4 - 1 for the loop and readjust
190885
 	   length.  */
190885
@@ -221,6 +231,10 @@ L(cross_page_continue):
190885
 	movl	%edi, %ecx
190885
 	orq	$(VEC_SIZE * 4 - 1), %rdi
190885
 	andl	$(VEC_SIZE * 4 - 1), %ecx
190885
+#  ifdef USE_AS_WMEMCHR
190885
+	/* NB: Divide bytes by 4 to get the wchar_t count.  */
190885
+	sarl	$2, %ecx
190885
+#  endif
190885
 	addq	%rcx, %rdx
190885
 # else
190885
 	/* Align data to VEC_SIZE * 4 - 1 for loop.  */
190885
@@ -250,15 +264,19 @@ L(loop_4x_vec):
190885
 
190885
 	subq	$-(VEC_SIZE * 4), %rdi
190885
 
190885
-	subq	$(VEC_SIZE * 4), %rdx
190885
+	subq	$(CHAR_PER_VEC * 4), %rdx
190885
 	ja	L(loop_4x_vec)
190885
 
190885
-	/* Fall through into less than 4 remaining vectors of length case.
190885
-	 */
190885
+	/* Fall through into less than 4 remaining vectors of length
190885
+	   case.  */
190885
 	VPCMPEQ	(VEC_SIZE * 0 + 1)(%rdi), %ymm0, %ymm1
190885
 	vpmovmskb %ymm1, %eax
190885
 	.p2align 4
190885
 L(last_4x_vec_or_less):
190885
+#  ifdef USE_AS_WMEMCHR
190885
+	/* NB: Multiply length by 4 to get byte count.  */
190885
+	sall	$2, %edx
190885
+#  endif
190885
 	/* Check if first VEC contained match.  */
190885
 	testl	%eax, %eax
190885
 	jnz	L(first_vec_x1_check)
190885
@@ -355,6 +373,10 @@ L(last_vec_x2_return):
190885
 L(last_4x_vec_or_less_cmpeq):
190885
 	VPCMPEQ	(VEC_SIZE * 4 + 1)(%rdi), %ymm0, %ymm1
190885
 	vpmovmskb %ymm1, %eax
190885
+#  ifdef USE_AS_WMEMCHR
190885
+	/* NB: Multiply length by 4 to get byte count.  */
190885
+	sall	$2, %edx
190885
+#  endif
190885
 	subq	$-(VEC_SIZE * 4), %rdi
190885
 	/* Check first VEC regardless.  */
190885
 	testl	%eax, %eax
190885
-- 
190885
GitLab
190885