190885
From 78ff769ceac455cb6749f64effe77d178216f0b0 Mon Sep 17 00:00:00 2001
190885
From: Noah Goldstein <goldstein.w.n@gmail.com>
190885
Date: Wed, 23 Jun 2021 01:56:29 -0400
190885
Subject: [PATCH] x86: Fix overflow bug in wcsnlen-sse4_1 and wcsnlen-avx2 [BZ
190885
 #27974]
190885
190885
This commit fixes the bug mentioned in the previous commit.
190885
190885
The previous implementations of wmemchr in these files relied
190885
on maxlen * sizeof(wchar_t) which was not guranteed by the standard.
190885
190885
The new overflow tests added in the previous commit now
190885
pass (As well as all the other tests).
190885
190885
Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
190885
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
190885
(cherry picked from commit a775a7a3eb1e85b54af0b4ee5ff4dcf66772a1fb)
190885
---
190885
 sysdeps/x86_64/multiarch/strlen-avx2.S | 130 ++++++++++++++++++-------
190885
 sysdeps/x86_64/multiarch/strlen-vec.S  |  15 ++-
190885
 2 files changed, 107 insertions(+), 38 deletions(-)
190885
190885
diff --git a/sysdeps/x86_64/multiarch/strlen-avx2.S b/sysdeps/x86_64/multiarch/strlen-avx2.S
190885
index be8a5db5..37688966 100644
190885
--- a/sysdeps/x86_64/multiarch/strlen-avx2.S
190885
+++ b/sysdeps/x86_64/multiarch/strlen-avx2.S
190885
@@ -44,21 +44,21 @@
190885
 
190885
 # define VEC_SIZE 32
190885
 # define PAGE_SIZE 4096
190885
+# define CHAR_PER_VEC	(VEC_SIZE / CHAR_SIZE)
190885
 
190885
 	.section SECTION(.text),"ax",@progbits
190885
 ENTRY (STRLEN)
190885
 # ifdef USE_AS_STRNLEN
190885
 	/* Check zero length.  */
190885
+#  ifdef __ILP32__
190885
+	/* Clear upper bits.  */
190885
+	and	%RSI_LP, %RSI_LP
190885
+#  else
190885
 	test	%RSI_LP, %RSI_LP
190885
+#  endif
190885
 	jz	L(zero)
190885
 	/* Store max len in R8_LP before adjusting if using WCSLEN.  */
190885
 	mov	%RSI_LP, %R8_LP
190885
-#  ifdef USE_AS_WCSLEN
190885
-	shl	$2, %RSI_LP
190885
-#  elif defined __ILP32__
190885
-	/* Clear the upper 32 bits.  */
190885
-	movl	%esi, %esi
190885
-#  endif
190885
 # endif
190885
 	movl	%edi, %eax
190885
 	movq	%rdi, %rdx
190885
@@ -72,10 +72,10 @@ ENTRY (STRLEN)
190885
 
190885
 	/* Check the first VEC_SIZE bytes.  */
190885
 	VPCMPEQ	(%rdi), %ymm0, %ymm1
190885
-	vpmovmskb	%ymm1, %eax
190885
+	vpmovmskb %ymm1, %eax
190885
 # ifdef USE_AS_STRNLEN
190885
 	/* If length < VEC_SIZE handle special.  */
190885
-	cmpq	$VEC_SIZE, %rsi
190885
+	cmpq	$CHAR_PER_VEC, %rsi
190885
 	jbe	L(first_vec_x0)
190885
 # endif
190885
 	/* If empty continue to aligned_more. Otherwise return bit
190885
@@ -84,6 +84,7 @@ ENTRY (STRLEN)
190885
 	jz	L(aligned_more)
190885
 	tzcntl	%eax, %eax
190885
 # ifdef USE_AS_WCSLEN
190885
+	/* NB: Divide bytes by 4 to get wchar_t count.  */
190885
 	shrl	$2, %eax
190885
 # endif
190885
 	VZEROUPPER_RETURN
190885
@@ -97,9 +98,14 @@ L(zero):
190885
 L(first_vec_x0):
190885
 	/* Set bit for max len so that tzcnt will return min of max len
190885
 	   and position of first match.  */
190885
+#  ifdef USE_AS_WCSLEN
190885
+	/* NB: Multiply length by 4 to get byte count.  */
190885
+	sall	$2, %esi
190885
+#  endif
190885
 	btsq	%rsi, %rax
190885
 	tzcntl	%eax, %eax
190885
 #  ifdef USE_AS_WCSLEN
190885
+	/* NB: Divide bytes by 4 to get wchar_t count.  */
190885
 	shrl	$2, %eax
190885
 #  endif
190885
 	VZEROUPPER_RETURN
190885
@@ -113,14 +119,19 @@ L(first_vec_x1):
190885
 # ifdef USE_AS_STRNLEN
190885
 	/* Use ecx which was computed earlier to compute correct value.
190885
 	 */
190885
+#  ifdef USE_AS_WCSLEN
190885
+	leal	-(VEC_SIZE * 4 + 1)(%rax, %rcx, 4), %eax
190885
+#  else
190885
 	subl	$(VEC_SIZE * 4 + 1), %ecx
190885
 	addl	%ecx, %eax
190885
+#  endif
190885
 # else
190885
 	subl	%edx, %edi
190885
 	incl	%edi
190885
 	addl	%edi, %eax
190885
 # endif
190885
 # ifdef USE_AS_WCSLEN
190885
+	/* NB: Divide bytes by 4 to get wchar_t count.  */
190885
 	shrl	$2, %eax
190885
 # endif
190885
 	VZEROUPPER_RETURN
190885
@@ -133,14 +144,19 @@ L(first_vec_x2):
190885
 # ifdef USE_AS_STRNLEN
190885
 	/* Use ecx which was computed earlier to compute correct value.
190885
 	 */
190885
+#  ifdef USE_AS_WCSLEN
190885
+	leal	-(VEC_SIZE * 3 + 1)(%rax, %rcx, 4), %eax
190885
+#  else
190885
 	subl	$(VEC_SIZE * 3 + 1), %ecx
190885
 	addl	%ecx, %eax
190885
+#  endif
190885
 # else
190885
 	subl	%edx, %edi
190885
 	addl	$(VEC_SIZE + 1), %edi
190885
 	addl	%edi, %eax
190885
 # endif
190885
 # ifdef USE_AS_WCSLEN
190885
+	/* NB: Divide bytes by 4 to get wchar_t count.  */
190885
 	shrl	$2, %eax
190885
 # endif
190885
 	VZEROUPPER_RETURN
190885
@@ -153,14 +169,19 @@ L(first_vec_x3):
190885
 # ifdef USE_AS_STRNLEN
190885
 	/* Use ecx which was computed earlier to compute correct value.
190885
 	 */
190885
+#  ifdef USE_AS_WCSLEN
190885
+	leal	-(VEC_SIZE * 2 + 1)(%rax, %rcx, 4), %eax
190885
+#  else
190885
 	subl	$(VEC_SIZE * 2 + 1), %ecx
190885
 	addl	%ecx, %eax
190885
+#  endif
190885
 # else
190885
 	subl	%edx, %edi
190885
 	addl	$(VEC_SIZE * 2 + 1), %edi
190885
 	addl	%edi, %eax
190885
 # endif
190885
 # ifdef USE_AS_WCSLEN
190885
+	/* NB: Divide bytes by 4 to get wchar_t count.  */
190885
 	shrl	$2, %eax
190885
 # endif
190885
 	VZEROUPPER_RETURN
190885
@@ -173,14 +194,19 @@ L(first_vec_x4):
190885
 # ifdef USE_AS_STRNLEN
190885
 	/* Use ecx which was computed earlier to compute correct value.
190885
 	 */
190885
+#  ifdef USE_AS_WCSLEN
190885
+	leal	-(VEC_SIZE * 1 + 1)(%rax, %rcx, 4), %eax
190885
+#  else
190885
 	subl	$(VEC_SIZE + 1), %ecx
190885
 	addl	%ecx, %eax
190885
+#  endif
190885
 # else
190885
 	subl	%edx, %edi
190885
 	addl	$(VEC_SIZE * 3 + 1), %edi
190885
 	addl	%edi, %eax
190885
 # endif
190885
 # ifdef USE_AS_WCSLEN
190885
+	/* NB: Divide bytes by 4 to get wchar_t count.  */
190885
 	shrl	$2, %eax
190885
 # endif
190885
 	VZEROUPPER_RETURN
190885
@@ -195,10 +221,14 @@ L(cross_page_continue):
190885
 	/* Check the first 4 * VEC_SIZE.  Only one VEC_SIZE at a time
190885
 	   since data is only aligned to VEC_SIZE.  */
190885
 # ifdef USE_AS_STRNLEN
190885
-	/* + 1 because rdi is aligned to VEC_SIZE - 1. + CHAR_SIZE because
190885
-	   it simplies the logic in last_4x_vec_or_less.  */
190885
+	/* + 1 because rdi is aligned to VEC_SIZE - 1. + CHAR_SIZE
190885
+	   because it simplies the logic in last_4x_vec_or_less.  */
190885
 	leaq	(VEC_SIZE * 4 + CHAR_SIZE + 1)(%rdi), %rcx
190885
 	subq	%rdx, %rcx
190885
+#  ifdef USE_AS_WCSLEN
190885
+	/* NB: Divide bytes by 4 to get the wchar_t count.  */
190885
+	sarl	$2, %ecx
190885
+#  endif
190885
 # endif
190885
 	/* Load first VEC regardless.  */
190885
 	VPCMPEQ	1(%rdi), %ymm0, %ymm1
190885
@@ -207,34 +237,38 @@ L(cross_page_continue):
190885
 	subq	%rcx, %rsi
190885
 	jb	L(last_4x_vec_or_less)
190885
 # endif
190885
-	vpmovmskb	%ymm1, %eax
190885
+	vpmovmskb %ymm1, %eax
190885
 	testl	%eax, %eax
190885
 	jnz	L(first_vec_x1)
190885
 
190885
 	VPCMPEQ	(VEC_SIZE + 1)(%rdi), %ymm0, %ymm1
190885
-	vpmovmskb	%ymm1, %eax
190885
+	vpmovmskb %ymm1, %eax
190885
 	testl	%eax, %eax
190885
 	jnz	L(first_vec_x2)
190885
 
190885
 	VPCMPEQ	(VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1
190885
-	vpmovmskb	%ymm1, %eax
190885
+	vpmovmskb %ymm1, %eax
190885
 	testl	%eax, %eax
190885
 	jnz	L(first_vec_x3)
190885
 
190885
 	VPCMPEQ	(VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1
190885
-	vpmovmskb	%ymm1, %eax
190885
+	vpmovmskb %ymm1, %eax
190885
 	testl	%eax, %eax
190885
 	jnz	L(first_vec_x4)
190885
 
190885
 	/* Align data to VEC_SIZE * 4 - 1.  */
190885
 # ifdef USE_AS_STRNLEN
190885
 	/* Before adjusting length check if at last VEC_SIZE * 4.  */
190885
-	cmpq	$(VEC_SIZE * 4 - 1), %rsi
190885
+	cmpq	$(CHAR_PER_VEC * 4 - 1), %rsi
190885
 	jbe	L(last_4x_vec_or_less_load)
190885
 	incq	%rdi
190885
 	movl	%edi, %ecx
190885
 	orq	$(VEC_SIZE * 4 - 1), %rdi
190885
 	andl	$(VEC_SIZE * 4 - 1), %ecx
190885
+#  ifdef USE_AS_WCSLEN
190885
+	/* NB: Divide bytes by 4 to get the wchar_t count.  */
190885
+	sarl	$2, %ecx
190885
+#  endif
190885
 	/* Readjust length.  */
190885
 	addq	%rcx, %rsi
190885
 # else
190885
@@ -246,13 +280,13 @@ L(cross_page_continue):
190885
 L(loop_4x_vec):
190885
 # ifdef USE_AS_STRNLEN
190885
 	/* Break if at end of length.  */
190885
-	subq	$(VEC_SIZE * 4), %rsi
190885
+	subq	$(CHAR_PER_VEC * 4), %rsi
190885
 	jb	L(last_4x_vec_or_less_cmpeq)
190885
 # endif
190885
-	/* Save some code size by microfusing VPMINU with the load. Since
190885
-	   the matches in ymm2/ymm4 can only be returned if there where no
190885
-	   matches in ymm1/ymm3 respectively there is no issue with overlap.
190885
-	 */
190885
+	/* Save some code size by microfusing VPMINU with the load.
190885
+	   Since the matches in ymm2/ymm4 can only be returned if there
190885
+	   where no matches in ymm1/ymm3 respectively there is no issue
190885
+	   with overlap.  */
190885
 	vmovdqa	1(%rdi), %ymm1
190885
 	VPMINU	(VEC_SIZE + 1)(%rdi), %ymm1, %ymm2
190885
 	vmovdqa	(VEC_SIZE * 2 + 1)(%rdi), %ymm3
190885
@@ -260,7 +294,7 @@ L(loop_4x_vec):
190885
 
190885
 	VPMINU	%ymm2, %ymm4, %ymm5
190885
 	VPCMPEQ	%ymm5, %ymm0, %ymm5
190885
-	vpmovmskb	%ymm5, %ecx
190885
+	vpmovmskb %ymm5, %ecx
190885
 
190885
 	subq	$-(VEC_SIZE * 4), %rdi
190885
 	testl	%ecx, %ecx
190885
@@ -268,27 +302,28 @@ L(loop_4x_vec):
190885
 
190885
 
190885
 	VPCMPEQ	%ymm1, %ymm0, %ymm1
190885
-	vpmovmskb	%ymm1, %eax
190885
+	vpmovmskb %ymm1, %eax
190885
 	subq	%rdx, %rdi
190885
 	testl	%eax, %eax
190885
 	jnz	L(last_vec_return_x0)
190885
 
190885
 	VPCMPEQ	%ymm2, %ymm0, %ymm2
190885
-	vpmovmskb	%ymm2, %eax
190885
+	vpmovmskb %ymm2, %eax
190885
 	testl	%eax, %eax
190885
 	jnz	L(last_vec_return_x1)
190885
 
190885
 	/* Combine last 2 VEC.  */
190885
 	VPCMPEQ	%ymm3, %ymm0, %ymm3
190885
-	vpmovmskb	%ymm3, %eax
190885
-	/* rcx has combined result from all 4 VEC. It will only be used if
190885
-	   the first 3 other VEC all did not contain a match.  */
190885
+	vpmovmskb %ymm3, %eax
190885
+	/* rcx has combined result from all 4 VEC. It will only be used
190885
+	   if the first 3 other VEC all did not contain a match.  */
190885
 	salq	$32, %rcx
190885
 	orq	%rcx, %rax
190885
 	tzcntq	%rax, %rax
190885
 	subq	$(VEC_SIZE * 2 - 1), %rdi
190885
 	addq	%rdi, %rax
190885
 # ifdef USE_AS_WCSLEN
190885
+	/* NB: Divide bytes by 4 to get wchar_t count.  */
190885
 	shrq	$2, %rax
190885
 # endif
190885
 	VZEROUPPER_RETURN
190885
@@ -297,15 +332,19 @@ L(loop_4x_vec):
190885
 # ifdef USE_AS_STRNLEN
190885
 	.p2align 4
190885
 L(last_4x_vec_or_less_load):
190885
-	/* Depending on entry adjust rdi / prepare first VEC in ymm1.  */
190885
+	/* Depending on entry adjust rdi / prepare first VEC in ymm1.
190885
+	 */
190885
 	subq	$-(VEC_SIZE * 4), %rdi
190885
 L(last_4x_vec_or_less_cmpeq):
190885
 	VPCMPEQ	1(%rdi), %ymm0, %ymm1
190885
 L(last_4x_vec_or_less):
190885
-
190885
-	vpmovmskb	%ymm1, %eax
190885
-	/* If remaining length > VEC_SIZE * 2. This works if esi is off by
190885
-	   VEC_SIZE * 4.  */
190885
+#  ifdef USE_AS_WCSLEN
190885
+	/* NB: Multiply length by 4 to get byte count.  */
190885
+	sall	$2, %esi
190885
+#  endif
190885
+	vpmovmskb %ymm1, %eax
190885
+	/* If remaining length > VEC_SIZE * 2. This works if esi is off
190885
+	   by VEC_SIZE * 4.  */
190885
 	testl	$(VEC_SIZE * 2), %esi
190885
 	jnz	L(last_4x_vec)
190885
 
190885
@@ -320,7 +359,7 @@ L(last_4x_vec_or_less):
190885
 	jb	L(max)
190885
 
190885
 	VPCMPEQ	(VEC_SIZE + 1)(%rdi), %ymm0, %ymm1
190885
-	vpmovmskb	%ymm1, %eax
190885
+	vpmovmskb %ymm1, %eax
190885
 	tzcntl	%eax, %eax
190885
 	/* Check the end of data.  */
190885
 	cmpl	%eax, %esi
190885
@@ -329,6 +368,7 @@ L(last_4x_vec_or_less):
190885
 	addl	$(VEC_SIZE + 1), %eax
190885
 	addq	%rdi, %rax
190885
 #  ifdef USE_AS_WCSLEN
190885
+	/* NB: Divide bytes by 4 to get wchar_t count.  */
190885
 	shrq	$2, %rax
190885
 #  endif
190885
 	VZEROUPPER_RETURN
190885
@@ -340,6 +380,7 @@ L(last_vec_return_x0):
190885
 	subq	$(VEC_SIZE * 4 - 1), %rdi
190885
 	addq	%rdi, %rax
190885
 # ifdef USE_AS_WCSLEN
190885
+	/* NB: Divide bytes by 4 to get wchar_t count.  */
190885
 	shrq	$2, %rax
190885
 # endif
190885
 	VZEROUPPER_RETURN
190885
@@ -350,6 +391,7 @@ L(last_vec_return_x1):
190885
 	subq	$(VEC_SIZE * 3 - 1), %rdi
190885
 	addq	%rdi, %rax
190885
 # ifdef USE_AS_WCSLEN
190885
+	/* NB: Divide bytes by 4 to get wchar_t count.  */
190885
 	shrq	$2, %rax
190885
 # endif
190885
 	VZEROUPPER_RETURN
190885
@@ -366,6 +408,7 @@ L(last_vec_x1_check):
190885
 	incl	%eax
190885
 	addq	%rdi, %rax
190885
 #  ifdef USE_AS_WCSLEN
190885
+	/* NB: Divide bytes by 4 to get wchar_t count.  */
190885
 	shrq	$2, %rax
190885
 #  endif
190885
 	VZEROUPPER_RETURN
190885
@@ -381,14 +424,14 @@ L(last_4x_vec):
190885
 	jnz	L(last_vec_x1)
190885
 
190885
 	VPCMPEQ	(VEC_SIZE + 1)(%rdi), %ymm0, %ymm1
190885
-	vpmovmskb	%ymm1, %eax
190885
+	vpmovmskb %ymm1, %eax
190885
 	testl	%eax, %eax
190885
 	jnz	L(last_vec_x2)
190885
 
190885
 	/* Normalize length.  */
190885
 	andl	$(VEC_SIZE * 4 - 1), %esi
190885
 	VPCMPEQ	(VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1
190885
-	vpmovmskb	%ymm1, %eax
190885
+	vpmovmskb %ymm1, %eax
190885
 	testl	%eax, %eax
190885
 	jnz	L(last_vec_x3)
190885
 
190885
@@ -396,7 +439,7 @@ L(last_4x_vec):
190885
 	jb	L(max)
190885
 
190885
 	VPCMPEQ	(VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1
190885
-	vpmovmskb	%ymm1, %eax
190885
+	vpmovmskb %ymm1, %eax
190885
 	tzcntl	%eax, %eax
190885
 	/* Check the end of data.  */
190885
 	cmpl	%eax, %esi
190885
@@ -405,6 +448,7 @@ L(last_4x_vec):
190885
 	addl	$(VEC_SIZE * 3 + 1), %eax
190885
 	addq	%rdi, %rax
190885
 #  ifdef USE_AS_WCSLEN
190885
+	/* NB: Divide bytes by 4 to get wchar_t count.  */
190885
 	shrq	$2, %rax
190885
 #  endif
190885
 	VZEROUPPER_RETURN
190885
@@ -419,6 +463,7 @@ L(last_vec_x1):
190885
 	incl	%eax
190885
 	addq	%rdi, %rax
190885
 #  ifdef USE_AS_WCSLEN
190885
+	/* NB: Divide bytes by 4 to get wchar_t count.  */
190885
 	shrq	$2, %rax
190885
 #  endif
190885
 	VZEROUPPER_RETURN
190885
@@ -432,6 +477,7 @@ L(last_vec_x2):
190885
 	addl	$(VEC_SIZE + 1), %eax
190885
 	addq	%rdi, %rax
190885
 #  ifdef USE_AS_WCSLEN
190885
+	/* NB: Divide bytes by 4 to get wchar_t count.  */
190885
 	shrq	$2, %rax
190885
 #  endif
190885
 	VZEROUPPER_RETURN
190885
@@ -447,6 +493,7 @@ L(last_vec_x3):
190885
 	addl	$(VEC_SIZE * 2 + 1), %eax
190885
 	addq	%rdi, %rax
190885
 #  ifdef USE_AS_WCSLEN
190885
+	/* NB: Divide bytes by 4 to get wchar_t count.  */
190885
 	shrq	$2, %rax
190885
 #  endif
190885
 	VZEROUPPER_RETURN
190885
@@ -455,13 +502,13 @@ L(max_end):
190885
 	VZEROUPPER_RETURN
190885
 # endif
190885
 
190885
-	/* Cold case for crossing page with first load.	 */
190885
+	/* Cold case for crossing page with first load.  */
190885
 	.p2align 4
190885
 L(cross_page_boundary):
190885
 	/* Align data to VEC_SIZE - 1.  */
190885
 	orq	$(VEC_SIZE - 1), %rdi
190885
 	VPCMPEQ	-(VEC_SIZE - 1)(%rdi), %ymm0, %ymm1
190885
-	vpmovmskb	%ymm1, %eax
190885
+	vpmovmskb %ymm1, %eax
190885
 	/* Remove the leading bytes. sarxl only uses bits [5:0] of COUNT
190885
 	   so no need to manually mod rdx.  */
190885
 	sarxl	%edx, %eax, %eax
190885
@@ -470,6 +517,10 @@ L(cross_page_boundary):
190885
 	jnz	L(cross_page_less_vec)
190885
 	leaq	1(%rdi), %rcx
190885
 	subq	%rdx, %rcx
190885
+#  ifdef USE_AS_WCSLEN
190885
+	/* NB: Divide bytes by 4 to get wchar_t count.  */
190885
+	shrl	$2, %ecx
190885
+#  endif
190885
 	/* Check length.  */
190885
 	cmpq	%rsi, %rcx
190885
 	jb	L(cross_page_continue)
190885
@@ -479,6 +530,7 @@ L(cross_page_boundary):
190885
 	jz	L(cross_page_continue)
190885
 	tzcntl	%eax, %eax
190885
 #  ifdef USE_AS_WCSLEN
190885
+	/* NB: Divide length by 4 to get wchar_t count.  */
190885
 	shrl	$2, %eax
190885
 #  endif
190885
 # endif
190885
@@ -489,6 +541,10 @@ L(return_vzeroupper):
190885
 	.p2align 4
190885
 L(cross_page_less_vec):
190885
 	tzcntl	%eax, %eax
190885
+#  ifdef USE_AS_WCSLEN
190885
+	/* NB: Multiply length by 4 to get byte count.  */
190885
+	sall	$2, %esi
190885
+#  endif
190885
 	cmpq	%rax, %rsi
190885
 	cmovb	%esi, %eax
190885
 #  ifdef USE_AS_WCSLEN
190885
diff --git a/sysdeps/x86_64/multiarch/strlen-vec.S b/sysdeps/x86_64/multiarch/strlen-vec.S
190885
index 8f660bb9..439e486a 100644
190885
--- a/sysdeps/x86_64/multiarch/strlen-vec.S
190885
+++ b/sysdeps/x86_64/multiarch/strlen-vec.S
190885
@@ -65,12 +65,25 @@ ENTRY(strlen)
190885
 	ret
190885
 L(n_nonzero):
190885
 # ifdef AS_WCSLEN
190885
-	shl	$2, %RSI_LP
190885
+/* Check for overflow from maxlen * sizeof(wchar_t). If it would
190885
+   overflow the only way this program doesn't have undefined behavior 
190885
+   is if there is a null terminator in valid memory so wcslen will 
190885
+   suffice.  */
190885
+	mov	%RSI_LP, %R10_LP
190885
+	sar	$62, %R10_LP
190885
+	test	%R10_LP, %R10_LP
190885
+	jnz	__wcslen_sse4_1
190885
+	sal	$2, %RSI_LP
190885
 # endif
190885
 
190885
+
190885
 /* Initialize long lived registers.  */
190885
 
190885
 	add	%RDI_LP, %RSI_LP
190885
+# ifdef AS_WCSLEN
190885
+/* Check for overflow again from s + maxlen * sizeof(wchar_t).  */
190885
+	jbe	__wcslen_sse4_1
190885
+# endif
190885
 	mov	%RSI_LP, %R10_LP
190885
 	and	$-64, %R10_LP
190885
 	mov	%RSI_LP, %R11_LP
190885
-- 
190885
GitLab
190885