|
|
a14c93 |
From ca51bafc1a88d8b8348f5fd97adc5d6ca93f8e76 Mon Sep 17 00:00:00 2001
|
|
|
a14c93 |
From: Andy Polyakov <appro@openssl.org>
|
|
|
a14c93 |
Date: Fri, 24 Nov 2017 11:35:50 +0100
|
|
|
a14c93 |
Subject: [PATCH] bn/asm/rsaz-avx2.pl: fix digit correction bug in
|
|
|
a14c93 |
rsaz_1024_mul_avx2.
|
|
|
a14c93 |
|
|
|
a14c93 |
Credit to OSS-Fuzz for finding this.
|
|
|
a14c93 |
|
|
|
a14c93 |
CVE-2017-3738
|
|
|
a14c93 |
|
|
|
a14c93 |
Reviewed-by: Rich Salz <rsalz@openssl.org>
|
|
|
a14c93 |
---
|
|
|
a14c93 |
crypto/bn/asm/rsaz-avx2.pl | 15 +++++++--------
|
|
|
a14c93 |
1 file changed, 7 insertions(+), 8 deletions(-)
|
|
|
a14c93 |
|
|
|
a14c93 |
diff --git a/crypto/bn/asm/rsaz-avx2.pl b/crypto/bn/asm/rsaz-avx2.pl
|
|
|
a14c93 |
index 712a77f..2b3f8b0 100755
|
|
|
a14c93 |
--- a/crypto/bn/asm/rsaz-avx2.pl
|
|
|
a14c93 |
+++ b/crypto/bn/asm/rsaz-avx2.pl
|
|
|
a14c93 |
@@ -239,7 +239,7 @@ $code.=<<___;
|
|
|
a14c93 |
vmovdqu 32*8-128($ap), $ACC8
|
|
|
a14c93 |
|
|
|
a14c93 |
lea 192(%rsp), $tp0 # 64+128=192
|
|
|
a14c93 |
- vpbroadcastq .Land_mask(%rip), $AND_MASK
|
|
|
a14c93 |
+ vmovdqu .Land_mask(%rip), $AND_MASK
|
|
|
a14c93 |
jmp .LOOP_GRANDE_SQR_1024
|
|
|
a14c93 |
|
|
|
a14c93 |
.align 32
|
|
|
a14c93 |
@@ -1070,10 +1070,10 @@ $code.=<<___;
|
|
|
a14c93 |
vpmuludq 32*6-128($np),$Yi,$TEMP1
|
|
|
a14c93 |
vpaddq $TEMP1,$ACC6,$ACC6
|
|
|
a14c93 |
vpmuludq 32*7-128($np),$Yi,$TEMP2
|
|
|
a14c93 |
- vpblendd \$3, $ZERO, $ACC9, $ACC9 # correct $ACC3
|
|
|
a14c93 |
+ vpblendd \$3, $ZERO, $ACC9, $TEMP1 # correct $ACC3
|
|
|
a14c93 |
vpaddq $TEMP2,$ACC7,$ACC7
|
|
|
a14c93 |
vpmuludq 32*8-128($np),$Yi,$TEMP0
|
|
|
a14c93 |
- vpaddq $ACC9, $ACC3, $ACC3 # correct $ACC3
|
|
|
a14c93 |
+ vpaddq $TEMP1, $ACC3, $ACC3 # correct $ACC3
|
|
|
a14c93 |
vpaddq $TEMP0,$ACC8,$ACC8
|
|
|
a14c93 |
|
|
|
a14c93 |
mov %rbx, %rax
|
|
|
a14c93 |
@@ -1086,7 +1086,9 @@ $code.=<<___;
|
|
|
a14c93 |
vmovdqu -8+32*2-128($ap),$TEMP2
|
|
|
a14c93 |
|
|
|
a14c93 |
mov $r1, %rax
|
|
|
a14c93 |
+ vpblendd \$0xfc, $ZERO, $ACC9, $ACC9 # correct $ACC3
|
|
|
a14c93 |
imull $n0, %eax
|
|
|
a14c93 |
+ vpaddq $ACC9,$ACC4,$ACC4 # correct $ACC3
|
|
|
a14c93 |
and \$0x1fffffff, %eax
|
|
|
a14c93 |
|
|
|
a14c93 |
imulq 16-128($ap),%rbx
|
|
|
a14c93 |
@@ -1322,15 +1324,12 @@ ___
|
|
|
a14c93 |
# But as we underutilize resources, it's possible to correct in
|
|
|
a14c93 |
# each iteration with marginal performance loss. But then, as
|
|
|
a14c93 |
# we do it in each iteration, we can correct less digits, and
|
|
|
a14c93 |
-# avoid performance penalties completely. Also note that we
|
|
|
a14c93 |
-# correct only three digits out of four. This works because
|
|
|
a14c93 |
-# most significant digit is subjected to less additions.
|
|
|
a14c93 |
+# avoid performance penalties completely.
|
|
|
a14c93 |
|
|
|
a14c93 |
$TEMP0 = $ACC9;
|
|
|
a14c93 |
$TEMP3 = $Bi;
|
|
|
a14c93 |
$TEMP4 = $Yi;
|
|
|
a14c93 |
$code.=<<___;
|
|
|
a14c93 |
- vpermq \$0, $AND_MASK, $AND_MASK
|
|
|
a14c93 |
vpaddq (%rsp), $TEMP1, $ACC0
|
|
|
a14c93 |
|
|
|
a14c93 |
vpsrlq \$29, $ACC0, $TEMP1
|
|
|
a14c93 |
@@ -1763,7 +1762,7 @@ $code.=<<___;
|
|
|
a14c93 |
|
|
|
a14c93 |
.align 64
|
|
|
a14c93 |
.Land_mask:
|
|
|
a14c93 |
- .quad 0x1fffffff,0x1fffffff,0x1fffffff,-1
|
|
|
a14c93 |
+ .quad 0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff
|
|
|
a14c93 |
.Lscatter_permd:
|
|
|
a14c93 |
.long 0,2,4,6,7,7,7,7
|
|
|
a14c93 |
.Lgather_permd:
|
|
|
a14c93 |
--
|
|
|
a14c93 |
2.9.5
|
|
|
a14c93 |
|