a14c93
From ca51bafc1a88d8b8348f5fd97adc5d6ca93f8e76 Mon Sep 17 00:00:00 2001
a14c93
From: Andy Polyakov <appro@openssl.org>
a14c93
Date: Fri, 24 Nov 2017 11:35:50 +0100
a14c93
Subject: [PATCH] bn/asm/rsaz-avx2.pl: fix digit correction bug in
a14c93
 rsaz_1024_mul_avx2.
a14c93
a14c93
Credit to OSS-Fuzz for finding this.
a14c93
a14c93
CVE-2017-3738
a14c93
a14c93
Reviewed-by: Rich Salz <rsalz@openssl.org>
a14c93
---
a14c93
 crypto/bn/asm/rsaz-avx2.pl | 15 +++++++--------
a14c93
 1 file changed, 7 insertions(+), 8 deletions(-)
a14c93
a14c93
diff --git a/crypto/bn/asm/rsaz-avx2.pl b/crypto/bn/asm/rsaz-avx2.pl
a14c93
index 712a77f..2b3f8b0 100755
a14c93
--- a/crypto/bn/asm/rsaz-avx2.pl
a14c93
+++ b/crypto/bn/asm/rsaz-avx2.pl
a14c93
@@ -239,7 +239,7 @@ $code.=<<___;
a14c93
 	vmovdqu		32*8-128($ap), $ACC8
a14c93
 
a14c93
 	lea	192(%rsp), $tp0			# 64+128=192
a14c93
-	vpbroadcastq	.Land_mask(%rip), $AND_MASK
a14c93
+	vmovdqu	.Land_mask(%rip), $AND_MASK
a14c93
 	jmp	.LOOP_GRANDE_SQR_1024
a14c93
 
a14c93
 .align	32
a14c93
@@ -1070,10 +1070,10 @@ $code.=<<___;
a14c93
 	vpmuludq	32*6-128($np),$Yi,$TEMP1
a14c93
 	vpaddq		$TEMP1,$ACC6,$ACC6
a14c93
 	vpmuludq	32*7-128($np),$Yi,$TEMP2
a14c93
-	 vpblendd	\$3, $ZERO, $ACC9, $ACC9	# correct $ACC3
a14c93
+	 vpblendd	\$3, $ZERO, $ACC9, $TEMP1	# correct $ACC3
a14c93
 	vpaddq		$TEMP2,$ACC7,$ACC7
a14c93
 	vpmuludq	32*8-128($np),$Yi,$TEMP0
a14c93
-	 vpaddq		$ACC9, $ACC3, $ACC3		# correct $ACC3
a14c93
+	 vpaddq		$TEMP1, $ACC3, $ACC3		# correct $ACC3
a14c93
 	vpaddq		$TEMP0,$ACC8,$ACC8
a14c93
 
a14c93
 	mov	%rbx, %rax
a14c93
@@ -1086,7 +1086,9 @@ $code.=<<___;
a14c93
 	 vmovdqu	-8+32*2-128($ap),$TEMP2
a14c93
 
a14c93
 	mov	$r1, %rax
a14c93
+	 vpblendd	\$0xfc, $ZERO, $ACC9, $ACC9	# correct $ACC3
a14c93
 	imull	$n0, %eax
a14c93
+	 vpaddq		$ACC9,$ACC4,$ACC4		# correct $ACC3
a14c93
 	and	\$0x1fffffff, %eax
a14c93
 
a14c93
 	 imulq	16-128($ap),%rbx
a14c93
@@ -1322,15 +1324,12 @@ ___
a14c93
 #	But as we underutilize resources, it's possible to correct in
a14c93
 #	each iteration with marginal performance loss. But then, as
a14c93
 #	we do it in each iteration, we can correct less digits, and
a14c93
-#	avoid performance penalties completely. Also note that we
a14c93
-#	correct only three digits out of four. This works because
a14c93
-#	most significant digit is subjected to less additions.
a14c93
+#	avoid performance penalties completely.
a14c93
 
a14c93
 $TEMP0 = $ACC9;
a14c93
 $TEMP3 = $Bi;
a14c93
 $TEMP4 = $Yi;
a14c93
 $code.=<<___;
a14c93
-	vpermq		\$0, $AND_MASK, $AND_MASK
a14c93
 	vpaddq		(%rsp), $TEMP1, $ACC0
a14c93
 
a14c93
 	vpsrlq		\$29, $ACC0, $TEMP1
a14c93
@@ -1763,7 +1762,7 @@ $code.=<<___;
a14c93
 
a14c93
 .align	64
a14c93
 .Land_mask:
a14c93
-	.quad	0x1fffffff,0x1fffffff,0x1fffffff,-1
a14c93
+	.quad	0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff
a14c93
 .Lscatter_permd:
a14c93
 	.long	0,2,4,6,7,7,7,7
a14c93
 .Lgather_permd:
a14c93
-- 
a14c93
2.9.5
a14c93