diff --git a/SOURCES/openssl-1.0.1e-cve-2014-3570.patch b/SOURCES/openssl-1.0.1e-cve-2014-3570.patch new file mode 100644 index 0000000..aff2f02 --- /dev/null +++ b/SOURCES/openssl-1.0.1e-cve-2014-3570.patch @@ -0,0 +1,3155 @@ +From e078642ddea29bbb6ba29788a6a513796387fbbb Mon Sep 17 00:00:00 2001 +From: Andy Polyakov +Date: Mon, 5 Jan 2015 14:52:56 +0100 +Subject: [PATCH] Fix for CVE-2014-3570. + +Reviewed-by: Emilia Kasper +(cherry picked from commit e793809ba50c1e90ab592fb640a856168e50f3de) +(with 1.0.1-specific addendum) +--- + crypto/bn/asm/mips.pl | 611 +++--------- + crypto/bn/asm/mips3.s | 2201 -------------------------------------------- + crypto/bn/asm/x86_64-gcc.c | 34 +- + crypto/bn/bn_asm.c | 16 +- + crypto/bn/bntest.c | 102 +- + 5 files changed, 234 insertions(+), 2730 deletions(-) + delete mode 100644 crypto/bn/asm/mips3.s + +diff --git a/crypto/bn/asm/mips.pl b/crypto/bn/asm/mips.pl +index d2f3ef7..215c9a7 100644 +--- a/crypto/bn/asm/mips.pl ++++ b/crypto/bn/asm/mips.pl +@@ -1872,6 +1872,41 @@ ___ + + ($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3); + ++sub add_c2 () { ++my ($hi,$lo,$c0,$c1,$c2, ++ $warm, # !$warm denotes first call with specific sequence of ++ # $c_[XYZ] when there is no Z-carry to accumulate yet; ++ $an,$bn # these two are arguments for multiplication which ++ # result is used in *next* step [which is why it's ++ # commented as "forward multiplication" below]; ++ )=@_; ++$code.=<<___; ++ mflo $lo ++ mfhi $hi ++ $ADDU $c0,$lo ++ sltu $at,$c0,$lo ++ $MULTU $an,$bn # forward multiplication ++ $ADDU $c0,$lo ++ $ADDU $at,$hi ++ sltu $lo,$c0,$lo ++ $ADDU $c1,$at ++ $ADDU $hi,$lo ++___ ++$code.=<<___ if (!$warm); ++ sltu $c2,$c1,$at ++ $ADDU $c1,$hi ++ sltu $hi,$c1,$hi ++ $ADDU $c2,$hi ++___ ++$code.=<<___ if ($warm); ++ sltu $at,$c1,$at ++ $ADDU $c1,$hi ++ $ADDU $c2,$at ++ sltu $hi,$c1,$hi ++ $ADDU $c2,$hi ++___ ++} ++ + $code.=<<___; + + .align 5 +@@ -1920,21 +1955,10 @@ $code.=<<___; + sltu $at,$c_2,$t_1 + $ADDU $c_3,$t_2,$at + $ST $c_2,$BNSZ($a0) +- +- mflo $t_1 +- mfhi $t_2 +- slt $c_2,$t_2,$zero +- $SLL $t_2,1 +- $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2); +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_3,$t_1 +- sltu $at,$c_3,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_1,$t_2 +- sltu $at,$c_1,$t_2 +- $ADDU $c_2,$at ++___ ++ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, ++ $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2); ++$code.=<<___; + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 +@@ -1945,67 +1969,19 @@ $code.=<<___; + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + $ST $c_3,2*$BNSZ($a0) +- +- mflo $t_1 +- mfhi $t_2 +- slt $c_3,$t_2,$zero +- $SLL $t_2,1 +- $MULTU $a_1,$a_2 # mul_add_c2(a[1],b[2],c1,c2,c3); +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_1,$t_1 +- sltu $at,$c_1,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_2,$t_2 +- sltu $at,$c_2,$t_2 +- $ADDU $c_3,$at +- mflo $t_1 +- mfhi $t_2 +- slt $at,$t_2,$zero +- $ADDU $c_3,$at +- $MULTU $a_4,$a_0 # mul_add_c2(a[4],b[0],c2,c3,c1); +- $SLL $t_2,1 +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_1,$t_1 +- sltu $at,$c_1,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_2,$t_2 +- sltu $at,$c_2,$t_2 +- $ADDU $c_3,$at ++___ ++ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, ++ $a_1,$a_2); # mul_add_c2(a[1],b[2],c1,c2,c3); ++ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, ++ $a_4,$a_0); # mul_add_c2(a[4],b[0],c2,c3,c1); ++$code.=<<___; + $ST $c_1,3*$BNSZ($a0) +- +- mflo $t_1 +- mfhi $t_2 +- slt $c_1,$t_2,$zero +- $SLL $t_2,1 +- $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1); +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_2,$t_1 +- sltu $at,$c_2,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_3,$t_2 +- sltu $at,$c_3,$t_2 +- $ADDU $c_1,$at +- mflo $t_1 +- mfhi $t_2 +- slt $at,$t_2,$zero +- $ADDU $c_1,$at +- $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1); +- $SLL $t_2,1 +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_2,$t_1 +- sltu $at,$c_2,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_3,$t_2 +- sltu $at,$c_3,$t_2 +- $ADDU $c_1,$at ++___ ++ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, ++ $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1); ++ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, ++ $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1); ++$code.=<<___; + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 +@@ -2016,97 +1992,23 @@ $code.=<<___; + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + $ST $c_2,4*$BNSZ($a0) +- +- mflo $t_1 +- mfhi $t_2 +- slt $c_2,$t_2,$zero +- $SLL $t_2,1 +- $MULTU $a_1,$a_4 # mul_add_c2(a[1],b[4],c3,c1,c2); +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_3,$t_1 +- sltu $at,$c_3,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_1,$t_2 +- sltu $at,$c_1,$t_2 +- $ADDU $c_2,$at +- mflo $t_1 +- mfhi $t_2 +- slt $at,$t_2,$zero +- $ADDU $c_2,$at +- $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2); +- $SLL $t_2,1 +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_3,$t_1 +- sltu $at,$c_3,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_1,$t_2 +- sltu $at,$c_1,$t_2 +- $ADDU $c_2,$at +- mflo $t_1 +- mfhi $t_2 +- slt $at,$t_2,$zero +- $MULTU $a_6,$a_0 # mul_add_c2(a[6],b[0],c1,c2,c3); +- $ADDU $c_2,$at +- $SLL $t_2,1 +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_3,$t_1 +- sltu $at,$c_3,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_1,$t_2 +- sltu $at,$c_1,$t_2 +- $ADDU $c_2,$at ++___ ++ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, ++ $a_1,$a_4); # mul_add_c2(a[1],b[4],c3,c1,c2); ++ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, ++ $a_2,$a_3); # mul_add_c2(a[2],b[3],c3,c1,c2); ++ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, ++ $a_6,$a_0); # mul_add_c2(a[6],b[0],c1,c2,c3); ++$code.=<<___; + $ST $c_3,5*$BNSZ($a0) +- +- mflo $t_1 +- mfhi $t_2 +- slt $c_3,$t_2,$zero +- $SLL $t_2,1 +- $MULTU $a_5,$a_1 # mul_add_c2(a[5],b[1],c1,c2,c3); +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_1,$t_1 +- sltu $at,$c_1,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_2,$t_2 +- sltu $at,$c_2,$t_2 +- $ADDU $c_3,$at +- mflo $t_1 +- mfhi $t_2 +- slt $at,$t_2,$zero +- $ADDU $c_3,$at +- $MULTU $a_4,$a_2 # mul_add_c2(a[4],b[2],c1,c2,c3); +- $SLL $t_2,1 +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_1,$t_1 +- sltu $at,$c_1,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_2,$t_2 +- sltu $at,$c_2,$t_2 +- $ADDU $c_3,$at +- mflo $t_1 +- mfhi $t_2 +- slt $at,$t_2,$zero +- $ADDU $c_3,$at +- $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3); +- $SLL $t_2,1 +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_1,$t_1 +- sltu $at,$c_1,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_2,$t_2 +- sltu $at,$c_2,$t_2 +- $ADDU $c_3,$at ++___ ++ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, ++ $a_5,$a_1); # mul_add_c2(a[5],b[1],c1,c2,c3); ++ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, ++ $a_4,$a_2); # mul_add_c2(a[4],b[2],c1,c2,c3); ++ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, ++ $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3); ++$code.=<<___; + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 +@@ -2117,112 +2019,25 @@ $code.=<<___; + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + $ST $c_1,6*$BNSZ($a0) +- +- mflo $t_1 +- mfhi $t_2 +- slt $c_1,$t_2,$zero +- $SLL $t_2,1 +- $MULTU $a_1,$a_6 # mul_add_c2(a[1],b[6],c2,c3,c1); +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_2,$t_1 +- sltu $at,$c_2,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_3,$t_2 +- sltu $at,$c_3,$t_2 +- $ADDU $c_1,$at +- mflo $t_1 +- mfhi $t_2 +- slt $at,$t_2,$zero +- $ADDU $c_1,$at +- $MULTU $a_2,$a_5 # mul_add_c2(a[2],b[5],c2,c3,c1); +- $SLL $t_2,1 +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_2,$t_1 +- sltu $at,$c_2,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_3,$t_2 +- sltu $at,$c_3,$t_2 +- $ADDU $c_1,$at +- mflo $t_1 +- mfhi $t_2 +- slt $at,$t_2,$zero +- $ADDU $c_1,$at +- $MULTU $a_3,$a_4 # mul_add_c2(a[3],b[4],c2,c3,c1); +- $SLL $t_2,1 +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_2,$t_1 +- sltu $at,$c_2,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_3,$t_2 +- sltu $at,$c_3,$t_2 +- $ADDU $c_1,$at +- mflo $t_1 +- mfhi $t_2 +- slt $at,$t_2,$zero +- $ADDU $c_1,$at +- $MULTU $a_7,$a_1 # mul_add_c2(a[7],b[1],c3,c1,c2); +- $SLL $t_2,1 +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_2,$t_1 +- sltu $at,$c_2,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_3,$t_2 +- sltu $at,$c_3,$t_2 +- $ADDU $c_1,$at ++___ ++ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, ++ $a_1,$a_6); # mul_add_c2(a[1],b[6],c2,c3,c1); ++ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, ++ $a_2,$a_5); # mul_add_c2(a[2],b[5],c2,c3,c1); ++ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, ++ $a_3,$a_4); # mul_add_c2(a[3],b[4],c2,c3,c1); ++ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, ++ $a_7,$a_1); # mul_add_c2(a[7],b[1],c3,c1,c2); ++$code.=<<___; + $ST $c_2,7*$BNSZ($a0) +- +- mflo $t_1 +- mfhi $t_2 +- slt $c_2,$t_2,$zero +- $SLL $t_2,1 +- $MULTU $a_6,$a_2 # mul_add_c2(a[6],b[2],c3,c1,c2); +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_3,$t_1 +- sltu $at,$c_3,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_1,$t_2 +- sltu $at,$c_1,$t_2 +- $ADDU $c_2,$at +- mflo $t_1 +- mfhi $t_2 +- slt $at,$t_2,$zero +- $ADDU $c_2,$at +- $MULTU $a_5,$a_3 # mul_add_c2(a[5],b[3],c3,c1,c2); +- $SLL $t_2,1 +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_3,$t_1 +- sltu $at,$c_3,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_1,$t_2 +- sltu $at,$c_1,$t_2 +- $ADDU $c_2,$at +- mflo $t_1 +- mfhi $t_2 +- slt $at,$t_2,$zero +- $ADDU $c_2,$at +- $MULTU $a_4,$a_4 # mul_add_c(a[4],b[4],c3,c1,c2); +- $SLL $t_2,1 +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_3,$t_1 +- sltu $at,$c_3,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_1,$t_2 +- sltu $at,$c_1,$t_2 +- $ADDU $c_2,$at ++___ ++ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, ++ $a_6,$a_2); # mul_add_c2(a[6],b[2],c3,c1,c2); ++ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, ++ $a_5,$a_3); # mul_add_c2(a[5],b[3],c3,c1,c2); ++ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, ++ $a_4,$a_4); # mul_add_c(a[4],b[4],c3,c1,c2); ++$code.=<<___; + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 +@@ -2233,82 +2048,21 @@ $code.=<<___; + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + $ST $c_3,8*$BNSZ($a0) +- +- mflo $t_1 +- mfhi $t_2 +- slt $c_3,$t_2,$zero +- $SLL $t_2,1 +- $MULTU $a_3,$a_6 # mul_add_c2(a[3],b[6],c1,c2,c3); +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_1,$t_1 +- sltu $at,$c_1,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_2,$t_2 +- sltu $at,$c_2,$t_2 +- $ADDU $c_3,$at +- mflo $t_1 +- mfhi $t_2 +- slt $at,$t_2,$zero +- $ADDU $c_3,$at +- $MULTU $a_4,$a_5 # mul_add_c2(a[4],b[5],c1,c2,c3); +- $SLL $t_2,1 +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_1,$t_1 +- sltu $at,$c_1,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_2,$t_2 +- sltu $at,$c_2,$t_2 +- $ADDU $c_3,$at +- mflo $t_1 +- mfhi $t_2 +- slt $at,$t_2,$zero +- $ADDU $c_3,$at +- $MULTU $a_7,$a_3 # mul_add_c2(a[7],b[3],c2,c3,c1); +- $SLL $t_2,1 +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_1,$t_1 +- sltu $at,$c_1,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_2,$t_2 +- sltu $at,$c_2,$t_2 +- $ADDU $c_3,$at ++___ ++ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, ++ $a_3,$a_6); # mul_add_c2(a[3],b[6],c1,c2,c3); ++ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, ++ $a_4,$a_5); # mul_add_c2(a[4],b[5],c1,c2,c3); ++ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, ++ $a_7,$a_3); # mul_add_c2(a[7],b[3],c2,c3,c1); ++$code.=<<___; + $ST $c_1,9*$BNSZ($a0) +- +- mflo $t_1 +- mfhi $t_2 +- slt $c_1,$t_2,$zero +- $SLL $t_2,1 +- $MULTU $a_6,$a_4 # mul_add_c2(a[6],b[4],c2,c3,c1); +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_2,$t_1 +- sltu $at,$c_2,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_3,$t_2 +- sltu $at,$c_3,$t_2 +- $ADDU $c_1,$at +- mflo $t_1 +- mfhi $t_2 +- slt $at,$t_2,$zero +- $ADDU $c_1,$at +- $MULTU $a_5,$a_5 # mul_add_c(a[5],b[5],c2,c3,c1); +- $SLL $t_2,1 +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_2,$t_1 +- sltu $at,$c_2,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_3,$t_2 +- sltu $at,$c_3,$t_2 +- $ADDU $c_1,$at ++___ ++ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, ++ $a_6,$a_4); # mul_add_c2(a[6],b[4],c2,c3,c1); ++ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, ++ $a_5,$a_5); # mul_add_c(a[5],b[5],c2,c3,c1); ++$code.=<<___; + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 +@@ -2319,52 +2073,17 @@ $code.=<<___; + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + $ST $c_2,10*$BNSZ($a0) +- +- mflo $t_1 +- mfhi $t_2 +- slt $c_2,$t_2,$zero +- $SLL $t_2,1 +- $MULTU $a_5,$a_6 # mul_add_c2(a[5],b[6],c3,c1,c2); +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_3,$t_1 +- sltu $at,$c_3,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_1,$t_2 +- sltu $at,$c_1,$t_2 +- $ADDU $c_2,$at +- mflo $t_1 +- mfhi $t_2 +- slt $at,$t_2,$zero +- $ADDU $c_2,$at +- $MULTU $a_7,$a_5 # mul_add_c2(a[7],b[5],c1,c2,c3); +- $SLL $t_2,1 +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_3,$t_1 +- sltu $at,$c_3,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_1,$t_2 +- sltu $at,$c_1,$t_2 +- $ADDU $c_2,$at ++___ ++ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, ++ $a_5,$a_6); # mul_add_c2(a[5],b[6],c3,c1,c2); ++ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, ++ $a_7,$a_5); # mul_add_c2(a[7],b[5],c1,c2,c3); ++$code.=<<___; + $ST $c_3,11*$BNSZ($a0) +- +- mflo $t_1 +- mfhi $t_2 +- slt $c_3,$t_2,$zero +- $SLL $t_2,1 +- $MULTU $a_6,$a_6 # mul_add_c(a[6],b[6],c1,c2,c3); +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_1,$t_1 +- sltu $at,$c_1,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_2,$t_2 +- sltu $at,$c_2,$t_2 +- $ADDU $c_3,$at ++___ ++ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, ++ $a_6,$a_6); # mul_add_c(a[6],b[6],c1,c2,c3); ++$code.=<<___; + mflo $t_1 + mfhi $t_2 + $ADDU $c_1,$t_1 +@@ -2375,21 +2094,10 @@ $code.=<<___; + sltu $at,$c_2,$t_2 + $ADDU $c_3,$at + $ST $c_1,12*$BNSZ($a0) +- +- mflo $t_1 +- mfhi $t_2 +- slt $c_1,$t_2,$zero +- $SLL $t_2,1 +- $MULTU $a_7,$a_7 # mul_add_c(a[7],b[7],c3,c1,c2); +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_2,$t_1 +- sltu $at,$c_2,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_3,$t_2 +- sltu $at,$c_3,$t_2 +- $ADDU $c_1,$at ++___ ++ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, ++ $a_7,$a_7); # mul_add_c(a[7],b[7],c3,c1,c2); ++$code.=<<___; + $ST $c_2,13*$BNSZ($a0) + + mflo $t_1 +@@ -2457,21 +2165,10 @@ $code.=<<___; + sltu $at,$c_2,$t_1 + $ADDU $c_3,$t_2,$at + $ST $c_2,$BNSZ($a0) +- +- mflo $t_1 +- mfhi $t_2 +- slt $c_2,$t_2,$zero +- $SLL $t_2,1 +- $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2); +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_3,$t_1 +- sltu $at,$c_3,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_1,$t_2 +- sltu $at,$c_1,$t_2 +- $ADDU $c_2,$at ++___ ++ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, ++ $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2); ++$code.=<<___; + mflo $t_1 + mfhi $t_2 + $ADDU $c_3,$t_1 +@@ -2482,52 +2179,17 @@ $code.=<<___; + sltu $at,$c_1,$t_2 + $ADDU $c_2,$at + $ST $c_3,2*$BNSZ($a0) +- +- mflo $t_1 +- mfhi $t_2 +- slt $c_3,$t_2,$zero +- $SLL $t_2,1 +- $MULTU $a_1,$a_2 # mul_add_c(a2[1],b[2],c1,c2,c3); +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_1,$t_1 +- sltu $at,$c_1,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_2,$t_2 +- sltu $at,$c_2,$t_2 +- $ADDU $c_3,$at +- mflo $t_1 +- mfhi $t_2 +- slt $at,$t_2,$zero +- $ADDU $c_3,$at +- $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1); +- $SLL $t_2,1 +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_1,$t_1 +- sltu $at,$c_1,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_2,$t_2 +- sltu $at,$c_2,$t_2 +- $ADDU $c_3,$at ++___ ++ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, ++ $a_1,$a_2); # mul_add_c2(a2[1],b[2],c1,c2,c3); ++ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, ++ $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1); ++$code.=<<___; + $ST $c_1,3*$BNSZ($a0) +- +- mflo $t_1 +- mfhi $t_2 +- slt $c_1,$t_2,$zero +- $SLL $t_2,1 +- $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1); +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_2,$t_1 +- sltu $at,$c_2,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_3,$t_2 +- sltu $at,$c_3,$t_2 +- $ADDU $c_1,$at ++___ ++ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, ++ $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1); ++$code.=<<___; + mflo $t_1 + mfhi $t_2 + $ADDU $c_2,$t_1 +@@ -2538,21 +2200,10 @@ $code.=<<___; + sltu $at,$c_3,$t_2 + $ADDU $c_1,$at + $ST $c_2,4*$BNSZ($a0) +- +- mflo $t_1 +- mfhi $t_2 +- slt $c_2,$t_2,$zero +- $SLL $t_2,1 +- $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3); +- slt $a2,$t_1,$zero +- $ADDU $t_2,$a2 +- $SLL $t_1,1 +- $ADDU $c_3,$t_1 +- sltu $at,$c_3,$t_1 +- $ADDU $t_2,$at +- $ADDU $c_1,$t_2 +- sltu $at,$c_1,$t_2 +- $ADDU $c_2,$at ++___ ++ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, ++ $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3); ++$code.=<<___; + $ST $c_3,5*$BNSZ($a0) + + mflo $t_1 +diff --git a/crypto/bn/asm/mips3.s b/crypto/bn/asm/mips3.s +deleted file mode 100644 +index dca4105..0000000 +--- a/crypto/bn/asm/mips3.s ++++ /dev/null +@@ -1,2201 +0,0 @@ +-.rdata +-.asciiz "mips3.s, Version 1.1" +-.asciiz "MIPS III/IV ISA artwork by Andy Polyakov " +- +-/* +- * ==================================================================== +- * Written by Andy Polyakov for the OpenSSL +- * project. +- * +- * Rights for redistribution and usage in source and binary forms are +- * granted according to the OpenSSL license. Warranty of any kind is +- * disclaimed. +- * ==================================================================== +- */ +- +-/* +- * This is my modest contributon to the OpenSSL project (see +- * http://www.openssl.org/ for more information about it) and is +- * a drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c +- * module. For updates see http://fy.chalmers.se/~appro/hpe/. +- * +- * The module is designed to work with either of the "new" MIPS ABI(5), +- * namely N32 or N64, offered by IRIX 6.x. It's not ment to work under +- * IRIX 5.x not only because it doesn't support new ABIs but also +- * because 5.x kernels put R4x00 CPU into 32-bit mode and all those +- * 64-bit instructions (daddu, dmultu, etc.) found below gonna only +- * cause illegal instruction exception:-( +- * +- * In addition the code depends on preprocessor flags set up by MIPSpro +- * compiler driver (either as or cc) and therefore (probably?) can't be +- * compiled by the GNU assembler. GNU C driver manages fine though... +- * I mean as long as -mmips-as is specified or is the default option, +- * because then it simply invokes /usr/bin/as which in turn takes +- * perfect care of the preprocessor definitions. Another neat feature +- * offered by the MIPSpro assembler is an optimization pass. This gave +- * me the opportunity to have the code looking more regular as all those +- * architecture dependent instruction rescheduling details were left to +- * the assembler. Cool, huh? +- * +- * Performance improvement is astonishing! 'apps/openssl speed rsa dsa' +- * goes way over 3 times faster! +- * +- * +- */ +-#include +-#include +- +-#if _MIPS_ISA>=4 +-#define MOVNZ(cond,dst,src) \ +- movn dst,src,cond +-#else +-#define MOVNZ(cond,dst,src) \ +- .set noreorder; \ +- bnezl cond,.+8; \ +- move dst,src; \ +- .set reorder +-#endif +- +-.text +- +-.set noat +-.set reorder +- +-#define MINUS4 v1 +- +-.align 5 +-LEAF(bn_mul_add_words) +- .set noreorder +- bgtzl a2,.L_bn_mul_add_words_proceed +- ld t0,0(a1) +- jr ra +- move v0,zero +- .set reorder +- +-.L_bn_mul_add_words_proceed: +- li MINUS4,-4 +- and ta0,a2,MINUS4 +- move v0,zero +- beqz ta0,.L_bn_mul_add_words_tail +- +-.L_bn_mul_add_words_loop: +- dmultu t0,a3 +- ld t1,0(a0) +- ld t2,8(a1) +- ld t3,8(a0) +- ld ta0,16(a1) +- ld ta1,16(a0) +- daddu t1,v0 +- sltu v0,t1,v0 /* All manuals say it "compares 32-bit +- * values", but it seems to work fine +- * even on 64-bit registers. */ +- mflo AT +- mfhi t0 +- daddu t1,AT +- daddu v0,t0 +- sltu AT,t1,AT +- sd t1,0(a0) +- daddu v0,AT +- +- dmultu t2,a3 +- ld ta2,24(a1) +- ld ta3,24(a0) +- daddu t3,v0 +- sltu v0,t3,v0 +- mflo AT +- mfhi t2 +- daddu t3,AT +- daddu v0,t2 +- sltu AT,t3,AT +- sd t3,8(a0) +- daddu v0,AT +- +- dmultu ta0,a3 +- subu a2,4 +- PTR_ADD a0,32 +- PTR_ADD a1,32 +- daddu ta1,v0 +- sltu v0,ta1,v0 +- mflo AT +- mfhi ta0 +- daddu ta1,AT +- daddu v0,ta0 +- sltu AT,ta1,AT +- sd ta1,-16(a0) +- daddu v0,AT +- +- +- dmultu ta2,a3 +- and ta0,a2,MINUS4 +- daddu ta3,v0 +- sltu v0,ta3,v0 +- mflo AT +- mfhi ta2 +- daddu ta3,AT +- daddu v0,ta2 +- sltu AT,ta3,AT +- sd ta3,-8(a0) +- daddu v0,AT +- .set noreorder +- bgtzl ta0,.L_bn_mul_add_words_loop +- ld t0,0(a1) +- +- bnezl a2,.L_bn_mul_add_words_tail +- ld t0,0(a1) +- .set reorder +- +-.L_bn_mul_add_words_return: +- jr ra +- +-.L_bn_mul_add_words_tail: +- dmultu t0,a3 +- ld t1,0(a0) +- subu a2,1 +- daddu t1,v0 +- sltu v0,t1,v0 +- mflo AT +- mfhi t0 +- daddu t1,AT +- daddu v0,t0 +- sltu AT,t1,AT +- sd t1,0(a0) +- daddu v0,AT +- beqz a2,.L_bn_mul_add_words_return +- +- ld t0,8(a1) +- dmultu t0,a3 +- ld t1,8(a0) +- subu a2,1 +- daddu t1,v0 +- sltu v0,t1,v0 +- mflo AT +- mfhi t0 +- daddu t1,AT +- daddu v0,t0 +- sltu AT,t1,AT +- sd t1,8(a0) +- daddu v0,AT +- beqz a2,.L_bn_mul_add_words_return +- +- ld t0,16(a1) +- dmultu t0,a3 +- ld t1,16(a0) +- daddu t1,v0 +- sltu v0,t1,v0 +- mflo AT +- mfhi t0 +- daddu t1,AT +- daddu v0,t0 +- sltu AT,t1,AT +- sd t1,16(a0) +- daddu v0,AT +- jr ra +-END(bn_mul_add_words) +- +-.align 5 +-LEAF(bn_mul_words) +- .set noreorder +- bgtzl a2,.L_bn_mul_words_proceed +- ld t0,0(a1) +- jr ra +- move v0,zero +- .set reorder +- +-.L_bn_mul_words_proceed: +- li MINUS4,-4 +- and ta0,a2,MINUS4 +- move v0,zero +- beqz ta0,.L_bn_mul_words_tail +- +-.L_bn_mul_words_loop: +- dmultu t0,a3 +- ld t2,8(a1) +- ld ta0,16(a1) +- ld ta2,24(a1) +- mflo AT +- mfhi t0 +- daddu v0,AT +- sltu t1,v0,AT +- sd v0,0(a0) +- daddu v0,t1,t0 +- +- dmultu t2,a3 +- subu a2,4 +- PTR_ADD a0,32 +- PTR_ADD a1,32 +- mflo AT +- mfhi t2 +- daddu v0,AT +- sltu t3,v0,AT +- sd v0,-24(a0) +- daddu v0,t3,t2 +- +- dmultu ta0,a3 +- mflo AT +- mfhi ta0 +- daddu v0,AT +- sltu ta1,v0,AT +- sd v0,-16(a0) +- daddu v0,ta1,ta0 +- +- +- dmultu ta2,a3 +- and ta0,a2,MINUS4 +- mflo AT +- mfhi ta2 +- daddu v0,AT +- sltu ta3,v0,AT +- sd v0,-8(a0) +- daddu v0,ta3,ta2 +- .set noreorder +- bgtzl ta0,.L_bn_mul_words_loop +- ld t0,0(a1) +- +- bnezl a2,.L_bn_mul_words_tail +- ld t0,0(a1) +- .set reorder +- +-.L_bn_mul_words_return: +- jr ra +- +-.L_bn_mul_words_tail: +- dmultu t0,a3 +- subu a2,1 +- mflo AT +- mfhi t0 +- daddu v0,AT +- sltu t1,v0,AT +- sd v0,0(a0) +- daddu v0,t1,t0 +- beqz a2,.L_bn_mul_words_return +- +- ld t0,8(a1) +- dmultu t0,a3 +- subu a2,1 +- mflo AT +- mfhi t0 +- daddu v0,AT +- sltu t1,v0,AT +- sd v0,8(a0) +- daddu v0,t1,t0 +- beqz a2,.L_bn_mul_words_return +- +- ld t0,16(a1) +- dmultu t0,a3 +- mflo AT +- mfhi t0 +- daddu v0,AT +- sltu t1,v0,AT +- sd v0,16(a0) +- daddu v0,t1,t0 +- jr ra +-END(bn_mul_words) +- +-.align 5 +-LEAF(bn_sqr_words) +- .set noreorder +- bgtzl a2,.L_bn_sqr_words_proceed +- ld t0,0(a1) +- jr ra +- move v0,zero +- .set reorder +- +-.L_bn_sqr_words_proceed: +- li MINUS4,-4 +- and ta0,a2,MINUS4 +- move v0,zero +- beqz ta0,.L_bn_sqr_words_tail +- +-.L_bn_sqr_words_loop: +- dmultu t0,t0 +- ld t2,8(a1) +- ld ta0,16(a1) +- ld ta2,24(a1) +- mflo t1 +- mfhi t0 +- sd t1,0(a0) +- sd t0,8(a0) +- +- dmultu t2,t2 +- subu a2,4 +- PTR_ADD a0,64 +- PTR_ADD a1,32 +- mflo t3 +- mfhi t2 +- sd t3,-48(a0) +- sd t2,-40(a0) +- +- dmultu ta0,ta0 +- mflo ta1 +- mfhi ta0 +- sd ta1,-32(a0) +- sd ta0,-24(a0) +- +- +- dmultu ta2,ta2 +- and ta0,a2,MINUS4 +- mflo ta3 +- mfhi ta2 +- sd ta3,-16(a0) +- sd ta2,-8(a0) +- +- .set noreorder +- bgtzl ta0,.L_bn_sqr_words_loop +- ld t0,0(a1) +- +- bnezl a2,.L_bn_sqr_words_tail +- ld t0,0(a1) +- .set reorder +- +-.L_bn_sqr_words_return: +- move v0,zero +- jr ra +- +-.L_bn_sqr_words_tail: +- dmultu t0,t0 +- subu a2,1 +- mflo t1 +- mfhi t0 +- sd t1,0(a0) +- sd t0,8(a0) +- beqz a2,.L_bn_sqr_words_return +- +- ld t0,8(a1) +- dmultu t0,t0 +- subu a2,1 +- mflo t1 +- mfhi t0 +- sd t1,16(a0) +- sd t0,24(a0) +- beqz a2,.L_bn_sqr_words_return +- +- ld t0,16(a1) +- dmultu t0,t0 +- mflo t1 +- mfhi t0 +- sd t1,32(a0) +- sd t0,40(a0) +- jr ra +-END(bn_sqr_words) +- +-.align 5 +-LEAF(bn_add_words) +- .set noreorder +- bgtzl a3,.L_bn_add_words_proceed +- ld t0,0(a1) +- jr ra +- move v0,zero +- .set reorder +- +-.L_bn_add_words_proceed: +- li MINUS4,-4 +- and AT,a3,MINUS4 +- move v0,zero +- beqz AT,.L_bn_add_words_tail +- +-.L_bn_add_words_loop: +- ld ta0,0(a2) +- subu a3,4 +- ld t1,8(a1) +- and AT,a3,MINUS4 +- ld t2,16(a1) +- PTR_ADD a2,32 +- ld t3,24(a1) +- PTR_ADD a0,32 +- ld ta1,-24(a2) +- PTR_ADD a1,32 +- ld ta2,-16(a2) +- ld ta3,-8(a2) +- daddu ta0,t0 +- sltu t8,ta0,t0 +- daddu t0,ta0,v0 +- sltu v0,t0,ta0 +- sd t0,-32(a0) +- daddu v0,t8 +- +- daddu ta1,t1 +- sltu t9,ta1,t1 +- daddu t1,ta1,v0 +- sltu v0,t1,ta1 +- sd t1,-24(a0) +- daddu v0,t9 +- +- daddu ta2,t2 +- sltu t8,ta2,t2 +- daddu t2,ta2,v0 +- sltu v0,t2,ta2 +- sd t2,-16(a0) +- daddu v0,t8 +- +- daddu ta3,t3 +- sltu t9,ta3,t3 +- daddu t3,ta3,v0 +- sltu v0,t3,ta3 +- sd t3,-8(a0) +- daddu v0,t9 +- +- .set noreorder +- bgtzl AT,.L_bn_add_words_loop +- ld t0,0(a1) +- +- bnezl a3,.L_bn_add_words_tail +- ld t0,0(a1) +- .set reorder +- +-.L_bn_add_words_return: +- jr ra +- +-.L_bn_add_words_tail: +- ld ta0,0(a2) +- daddu ta0,t0 +- subu a3,1 +- sltu t8,ta0,t0 +- daddu t0,ta0,v0 +- sltu v0,t0,ta0 +- sd t0,0(a0) +- daddu v0,t8 +- beqz a3,.L_bn_add_words_return +- +- ld t1,8(a1) +- ld ta1,8(a2) +- daddu ta1,t1 +- subu a3,1 +- sltu t9,ta1,t1 +- daddu t1,ta1,v0 +- sltu v0,t1,ta1 +- sd t1,8(a0) +- daddu v0,t9 +- beqz a3,.L_bn_add_words_return +- +- ld t2,16(a1) +- ld ta2,16(a2) +- daddu ta2,t2 +- sltu t8,ta2,t2 +- daddu t2,ta2,v0 +- sltu v0,t2,ta2 +- sd t2,16(a0) +- daddu v0,t8 +- jr ra +-END(bn_add_words) +- +-.align 5 +-LEAF(bn_sub_words) +- .set noreorder +- bgtzl a3,.L_bn_sub_words_proceed +- ld t0,0(a1) +- jr ra +- move v0,zero +- .set reorder +- +-.L_bn_sub_words_proceed: +- li MINUS4,-4 +- and AT,a3,MINUS4 +- move v0,zero +- beqz AT,.L_bn_sub_words_tail +- +-.L_bn_sub_words_loop: +- ld ta0,0(a2) +- subu a3,4 +- ld t1,8(a1) +- and AT,a3,MINUS4 +- ld t2,16(a1) +- PTR_ADD a2,32 +- ld t3,24(a1) +- PTR_ADD a0,32 +- ld ta1,-24(a2) +- PTR_ADD a1,32 +- ld ta2,-16(a2) +- ld ta3,-8(a2) +- sltu t8,t0,ta0 +- dsubu t0,ta0 +- dsubu ta0,t0,v0 +- sd ta0,-32(a0) +- MOVNZ (t0,v0,t8) +- +- sltu t9,t1,ta1 +- dsubu t1,ta1 +- dsubu ta1,t1,v0 +- sd ta1,-24(a0) +- MOVNZ (t1,v0,t9) +- +- +- sltu t8,t2,ta2 +- dsubu t2,ta2 +- dsubu ta2,t2,v0 +- sd ta2,-16(a0) +- MOVNZ (t2,v0,t8) +- +- sltu t9,t3,ta3 +- dsubu t3,ta3 +- dsubu ta3,t3,v0 +- sd ta3,-8(a0) +- MOVNZ (t3,v0,t9) +- +- .set noreorder +- bgtzl AT,.L_bn_sub_words_loop +- ld t0,0(a1) +- +- bnezl a3,.L_bn_sub_words_tail +- ld t0,0(a1) +- .set reorder +- +-.L_bn_sub_words_return: +- jr ra +- +-.L_bn_sub_words_tail: +- ld ta0,0(a2) +- subu a3,1 +- sltu t8,t0,ta0 +- dsubu t0,ta0 +- dsubu ta0,t0,v0 +- MOVNZ (t0,v0,t8) +- sd ta0,0(a0) +- beqz a3,.L_bn_sub_words_return +- +- ld t1,8(a1) +- subu a3,1 +- ld ta1,8(a2) +- sltu t9,t1,ta1 +- dsubu t1,ta1 +- dsubu ta1,t1,v0 +- MOVNZ (t1,v0,t9) +- sd ta1,8(a0) +- beqz a3,.L_bn_sub_words_return +- +- ld t2,16(a1) +- ld ta2,16(a2) +- sltu t8,t2,ta2 +- dsubu t2,ta2 +- dsubu ta2,t2,v0 +- MOVNZ (t2,v0,t8) +- sd ta2,16(a0) +- jr ra +-END(bn_sub_words) +- +-#undef MINUS4 +- +-.align 5 +-LEAF(bn_div_3_words) +- .set reorder +- move a3,a0 /* we know that bn_div_words doesn't +- * touch a3, ta2, ta3 and preserves a2 +- * so that we can save two arguments +- * and return address in registers +- * instead of stack:-) +- */ +- ld a0,(a3) +- move ta2,a1 +- ld a1,-8(a3) +- bne a0,a2,.L_bn_div_3_words_proceed +- li v0,-1 +- jr ra +-.L_bn_div_3_words_proceed: +- move ta3,ra +- bal bn_div_words +- move ra,ta3 +- dmultu ta2,v0 +- ld t2,-16(a3) +- move ta0,zero +- mfhi t1 +- mflo t0 +- sltu t8,t1,v1 +-.L_bn_div_3_words_inner_loop: +- bnez t8,.L_bn_div_3_words_inner_loop_done +- sgeu AT,t2,t0 +- seq t9,t1,v1 +- and AT,t9 +- sltu t3,t0,ta2 +- daddu v1,a2 +- dsubu t1,t3 +- dsubu t0,ta2 +- sltu t8,t1,v1 +- sltu ta0,v1,a2 +- or t8,ta0 +- .set noreorder +- beqzl AT,.L_bn_div_3_words_inner_loop +- dsubu v0,1 +- .set reorder +-.L_bn_div_3_words_inner_loop_done: +- jr ra +-END(bn_div_3_words) +- +-.align 5 +-LEAF(bn_div_words) +- .set noreorder +- bnezl a2,.L_bn_div_words_proceed +- move v1,zero +- jr ra +- li v0,-1 /* I'd rather signal div-by-zero +- * which can be done with 'break 7' */ +- +-.L_bn_div_words_proceed: +- bltz a2,.L_bn_div_words_body +- move t9,v1 +- dsll a2,1 +- bgtz a2,.-4 +- addu t9,1 +- +- .set reorder +- negu t1,t9 +- li t2,-1 +- dsll t2,t1 +- and t2,a0 +- dsrl AT,a1,t1 +- .set noreorder +- bnezl t2,.+8 +- break 6 /* signal overflow */ +- .set reorder +- dsll a0,t9 +- dsll a1,t9 +- or a0,AT +- +-#define QT ta0 +-#define HH ta1 +-#define DH v1 +-.L_bn_div_words_body: +- dsrl DH,a2,32 +- sgeu AT,a0,a2 +- .set noreorder +- bnezl AT,.+8 +- dsubu a0,a2 +- .set reorder +- +- li QT,-1 +- dsrl HH,a0,32 +- dsrl QT,32 /* q=0xffffffff */ +- beq DH,HH,.L_bn_div_words_skip_div1 +- ddivu zero,a0,DH +- mflo QT +-.L_bn_div_words_skip_div1: +- dmultu a2,QT +- dsll t3,a0,32 +- dsrl AT,a1,32 +- or t3,AT +- mflo t0 +- mfhi t1 +-.L_bn_div_words_inner_loop1: +- sltu t2,t3,t0 +- seq t8,HH,t1 +- sltu AT,HH,t1 +- and t2,t8 +- sltu v0,t0,a2 +- or AT,t2 +- .set noreorder +- beqz AT,.L_bn_div_words_inner_loop1_done +- dsubu t1,v0 +- dsubu t0,a2 +- b .L_bn_div_words_inner_loop1 +- dsubu QT,1 +- .set reorder +-.L_bn_div_words_inner_loop1_done: +- +- dsll a1,32 +- dsubu a0,t3,t0 +- dsll v0,QT,32 +- +- li QT,-1 +- dsrl HH,a0,32 +- dsrl QT,32 /* q=0xffffffff */ +- beq DH,HH,.L_bn_div_words_skip_div2 +- ddivu zero,a0,DH +- mflo QT +-.L_bn_div_words_skip_div2: +-#undef DH +- dmultu a2,QT +- dsll t3,a0,32 +- dsrl AT,a1,32 +- or t3,AT +- mflo t0 +- mfhi t1 +-.L_bn_div_words_inner_loop2: +- sltu t2,t3,t0 +- seq t8,HH,t1 +- sltu AT,HH,t1 +- and t2,t8 +- sltu v1,t0,a2 +- or AT,t2 +- .set noreorder +- beqz AT,.L_bn_div_words_inner_loop2_done +- dsubu t1,v1 +- dsubu t0,a2 +- b .L_bn_div_words_inner_loop2 +- dsubu QT,1 +- .set reorder +-.L_bn_div_words_inner_loop2_done: +-#undef HH +- +- dsubu a0,t3,t0 +- or v0,QT +- dsrl v1,a0,t9 /* v1 contains remainder if anybody wants it */ +- dsrl a2,t9 /* restore a2 */ +- jr ra +-#undef QT +-END(bn_div_words) +- +-#define a_0 t0 +-#define a_1 t1 +-#define a_2 t2 +-#define a_3 t3 +-#define b_0 ta0 +-#define b_1 ta1 +-#define b_2 ta2 +-#define b_3 ta3 +- +-#define a_4 s0 +-#define a_5 s2 +-#define a_6 s4 +-#define a_7 a1 /* once we load a[7] we don't need a anymore */ +-#define b_4 s1 +-#define b_5 s3 +-#define b_6 s5 +-#define b_7 a2 /* once we load b[7] we don't need b anymore */ +- +-#define t_1 t8 +-#define t_2 t9 +- +-#define c_1 v0 +-#define c_2 v1 +-#define c_3 a3 +- +-#define FRAME_SIZE 48 +- +-.align 5 +-LEAF(bn_mul_comba8) +- .set noreorder +- PTR_SUB sp,FRAME_SIZE +- .frame sp,64,ra +- .set reorder +- ld a_0,0(a1) /* If compiled with -mips3 option on +- * R5000 box assembler barks on this +- * line with "shouldn't have mult/div +- * as last instruction in bb (R10K +- * bug)" warning. If anybody out there +- * has a clue about how to circumvent +- * this do send me a note. +- * +- */ +- ld b_0,0(a2) +- ld a_1,8(a1) +- ld a_2,16(a1) +- ld a_3,24(a1) +- ld b_1,8(a2) +- ld b_2,16(a2) +- ld b_3,24(a2) +- dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ +- sd s0,0(sp) +- sd s1,8(sp) +- sd s2,16(sp) +- sd s3,24(sp) +- sd s4,32(sp) +- sd s5,40(sp) +- mflo c_1 +- mfhi c_2 +- +- dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */ +- ld a_4,32(a1) +- ld a_5,40(a1) +- ld a_6,48(a1) +- ld a_7,56(a1) +- ld b_4,32(a2) +- ld b_5,40(a2) +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu c_3,t_2,AT +- dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */ +- ld b_6,48(a2) +- ld b_7,56(a2) +- sd c_1,0(a0) /* r[0]=c1; */ +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu c_1,c_3,t_2 +- sd c_2,8(a0) /* r[1]=c2; */ +- +- dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu c_2,c_1,t_2 +- dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- sd c_3,16(a0) /* r[2]=c3; */ +- +- dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu c_3,c_2,t_2 +- dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- sd c_1,24(a0) /* r[3]=c1; */ +- +- dmultu a_4,b_0 /* mul_add_c(a[4],b[0],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu c_1,c_3,t_2 +- dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- dmultu a_0,b_4 /* mul_add_c(a[0],b[4],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- sd c_2,32(a0) /* r[4]=c2; */ +- +- dmultu a_0,b_5 /* mul_add_c(a[0],b[5],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu c_2,c_1,t_2 +- dmultu a_1,b_4 /* mul_add_c(a[1],b[4],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- dmultu a_4,b_1 /* mul_add_c(a[4],b[1],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- dmultu a_5,b_0 /* mul_add_c(a[5],b[0],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- sd c_3,40(a0) /* r[5]=c3; */ +- +- dmultu a_6,b_0 /* mul_add_c(a[6],b[0],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu c_3,c_2,t_2 +- dmultu a_5,b_1 /* mul_add_c(a[5],b[1],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- dmultu a_4,b_2 /* mul_add_c(a[4],b[2],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- dmultu a_2,b_4 /* mul_add_c(a[2],b[4],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- dmultu a_1,b_5 /* mul_add_c(a[1],b[5],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- dmultu a_0,b_6 /* mul_add_c(a[0],b[6],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- sd c_1,48(a0) /* r[6]=c1; */ +- +- dmultu a_0,b_7 /* mul_add_c(a[0],b[7],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu c_1,c_3,t_2 +- dmultu a_1,b_6 /* mul_add_c(a[1],b[6],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- dmultu a_2,b_5 /* mul_add_c(a[2],b[5],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- dmultu a_3,b_4 /* mul_add_c(a[3],b[4],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- dmultu a_4,b_3 /* mul_add_c(a[4],b[3],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- dmultu a_5,b_2 /* mul_add_c(a[5],b[2],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- dmultu a_6,b_1 /* mul_add_c(a[6],b[1],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- dmultu a_7,b_0 /* mul_add_c(a[7],b[0],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- sd c_2,56(a0) /* r[7]=c2; */ +- +- dmultu a_7,b_1 /* mul_add_c(a[7],b[1],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu c_2,c_1,t_2 +- dmultu a_6,b_2 /* mul_add_c(a[6],b[2],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- dmultu a_5,b_3 /* mul_add_c(a[5],b[3],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- dmultu a_4,b_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- dmultu a_3,b_5 /* mul_add_c(a[3],b[5],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- dmultu a_2,b_6 /* mul_add_c(a[2],b[6],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- dmultu a_1,b_7 /* mul_add_c(a[1],b[7],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- sd c_3,64(a0) /* r[8]=c3; */ +- +- dmultu a_2,b_7 /* mul_add_c(a[2],b[7],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu c_3,c_2,t_2 +- dmultu a_3,b_6 /* mul_add_c(a[3],b[6],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- dmultu a_4,b_5 /* mul_add_c(a[4],b[5],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- dmultu a_5,b_4 /* mul_add_c(a[5],b[4],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- dmultu a_6,b_3 /* mul_add_c(a[6],b[3],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- dmultu a_7,b_2 /* mul_add_c(a[7],b[2],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- sd c_1,72(a0) /* r[9]=c1; */ +- +- dmultu a_7,b_3 /* mul_add_c(a[7],b[3],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu c_1,c_3,t_2 +- dmultu a_6,b_4 /* mul_add_c(a[6],b[4],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- dmultu a_5,b_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- dmultu a_4,b_6 /* mul_add_c(a[4],b[6],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- dmultu a_3,b_7 /* mul_add_c(a[3],b[7],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- sd c_2,80(a0) /* r[10]=c2; */ +- +- dmultu a_4,b_7 /* mul_add_c(a[4],b[7],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu c_2,c_1,t_2 +- dmultu a_5,b_6 /* mul_add_c(a[5],b[6],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- dmultu a_6,b_5 /* mul_add_c(a[6],b[5],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- dmultu a_7,b_4 /* mul_add_c(a[7],b[4],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- sd c_3,88(a0) /* r[11]=c3; */ +- +- dmultu a_7,b_5 /* mul_add_c(a[7],b[5],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu c_3,c_2,t_2 +- dmultu a_6,b_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- dmultu a_5,b_7 /* mul_add_c(a[5],b[7],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- sd c_1,96(a0) /* r[12]=c1; */ +- +- dmultu a_6,b_7 /* mul_add_c(a[6],b[7],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu c_1,c_3,t_2 +- dmultu a_7,b_6 /* mul_add_c(a[7],b[6],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- sd c_2,104(a0) /* r[13]=c2; */ +- +- dmultu a_7,b_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */ +- ld s0,0(sp) +- ld s1,8(sp) +- ld s2,16(sp) +- ld s3,24(sp) +- ld s4,32(sp) +- ld s5,40(sp) +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sd c_3,112(a0) /* r[14]=c3; */ +- sd c_1,120(a0) /* r[15]=c1; */ +- +- PTR_ADD sp,FRAME_SIZE +- +- jr ra +-END(bn_mul_comba8) +- +-.align 5 +-LEAF(bn_mul_comba4) +- .set reorder +- ld a_0,0(a1) +- ld b_0,0(a2) +- ld a_1,8(a1) +- ld a_2,16(a1) +- dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ +- ld a_3,24(a1) +- ld b_1,8(a2) +- ld b_2,16(a2) +- ld b_3,24(a2) +- mflo c_1 +- mfhi c_2 +- sd c_1,0(a0) +- +- dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu c_3,t_2,AT +- dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu c_1,c_3,t_2 +- sd c_2,8(a0) +- +- dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu c_2,c_1,t_2 +- dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- sd c_3,16(a0) +- +- dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu c_3,c_2,t_2 +- dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- sd c_1,24(a0) +- +- dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu c_1,c_3,t_2 +- dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- sd c_2,32(a0) +- +- dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu c_2,c_1,t_2 +- dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- sd c_3,40(a0) +- +- dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sd c_1,48(a0) +- sd c_2,56(a0) +- +- jr ra +-END(bn_mul_comba4) +- +-#undef a_4 +-#undef a_5 +-#undef a_6 +-#undef a_7 +-#define a_4 b_0 +-#define a_5 b_1 +-#define a_6 b_2 +-#define a_7 b_3 +- +-.align 5 +-LEAF(bn_sqr_comba8) +- .set reorder +- ld a_0,0(a1) +- ld a_1,8(a1) +- ld a_2,16(a1) +- ld a_3,24(a1) +- +- dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ +- ld a_4,32(a1) +- ld a_5,40(a1) +- ld a_6,48(a1) +- ld a_7,56(a1) +- mflo c_1 +- mfhi c_2 +- sd c_1,0(a0) +- +- dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- slt c_1,t_2,zero +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu c_3,t_2,AT +- sd c_2,8(a0) +- +- dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- slt c_2,t_2,zero +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- sd c_3,16(a0) +- +- dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- slt c_3,t_2,zero +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- dmultu a_1,a_2 /* mul_add_c2(a[1],b[2],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- slt AT,t_2,zero +- daddu c_3,AT +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- sd c_1,24(a0) +- +- dmultu a_4,a_0 /* mul_add_c2(a[4],b[0],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- slt c_1,t_2,zero +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- slt AT,t_2,zero +- daddu c_1,AT +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- sd c_2,32(a0) +- +- dmultu a_0,a_5 /* mul_add_c2(a[0],b[5],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- slt c_2,t_2,zero +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- dmultu a_1,a_4 /* mul_add_c2(a[1],b[4],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- slt AT,t_2,zero +- daddu c_2,AT +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- slt AT,t_2,zero +- daddu c_2,AT +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- sd c_3,40(a0) +- +- dmultu a_6,a_0 /* mul_add_c2(a[6],b[0],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- slt c_3,t_2,zero +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- dmultu a_5,a_1 /* mul_add_c2(a[5],b[1],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- slt AT,t_2,zero +- daddu c_3,AT +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- dmultu a_4,a_2 /* mul_add_c2(a[4],b[2],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- slt AT,t_2,zero +- daddu c_3,AT +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- sd c_1,48(a0) +- +- dmultu a_0,a_7 /* mul_add_c2(a[0],b[7],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- slt c_1,t_2,zero +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- dmultu a_1,a_6 /* mul_add_c2(a[1],b[6],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- slt AT,t_2,zero +- daddu c_1,AT +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- dmultu a_2,a_5 /* mul_add_c2(a[2],b[5],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- slt AT,t_2,zero +- daddu c_1,AT +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- dmultu a_3,a_4 /* mul_add_c2(a[3],b[4],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- slt AT,t_2,zero +- daddu c_1,AT +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- sd c_2,56(a0) +- +- dmultu a_7,a_1 /* mul_add_c2(a[7],b[1],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- slt c_2,t_2,zero +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- dmultu a_6,a_2 /* mul_add_c2(a[6],b[2],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- slt AT,t_2,zero +- daddu c_2,AT +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- dmultu a_5,a_3 /* mul_add_c2(a[5],b[3],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- slt AT,t_2,zero +- daddu c_2,AT +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- dmultu a_4,a_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- sd c_3,64(a0) +- +- dmultu a_2,a_7 /* mul_add_c2(a[2],b[7],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- slt c_3,t_2,zero +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- dmultu a_3,a_6 /* mul_add_c2(a[3],b[6],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- slt AT,t_2,zero +- daddu c_3,AT +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- dmultu a_4,a_5 /* mul_add_c2(a[4],b[5],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- slt AT,t_2,zero +- daddu c_3,AT +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- sd c_1,72(a0) +- +- dmultu a_7,a_3 /* mul_add_c2(a[7],b[3],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- slt c_1,t_2,zero +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- dmultu a_6,a_4 /* mul_add_c2(a[6],b[4],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- slt AT,t_2,zero +- daddu c_1,AT +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- dmultu a_5,a_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- sd c_2,80(a0) +- +- dmultu a_4,a_7 /* mul_add_c2(a[4],b[7],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- slt c_2,t_2,zero +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- dmultu a_5,a_6 /* mul_add_c2(a[5],b[6],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- slt AT,t_2,zero +- daddu c_2,AT +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- sd c_3,88(a0) +- +- dmultu a_7,a_5 /* mul_add_c2(a[7],b[5],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- slt c_3,t_2,zero +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- dmultu a_6,a_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- sd c_1,96(a0) +- +- dmultu a_6,a_7 /* mul_add_c2(a[6],b[7],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- slt c_1,t_2,zero +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- sd c_2,104(a0) +- +- dmultu a_7,a_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sd c_3,112(a0) +- sd c_1,120(a0) +- +- jr ra +-END(bn_sqr_comba8) +- +-.align 5 +-LEAF(bn_sqr_comba4) +- .set reorder +- ld a_0,0(a1) +- ld a_1,8(a1) +- ld a_2,16(a1) +- ld a_3,24(a1) +- dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ +- mflo c_1 +- mfhi c_2 +- sd c_1,0(a0) +- +- dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- slt c_1,t_2,zero +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu c_3,t_2,AT +- sd c_2,8(a0) +- +- dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- slt c_2,t_2,zero +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- sd c_3,16(a0) +- +- dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- slt c_3,t_2,zero +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- dmultu a_1,a_2 /* mul_add_c(a2[1],b[2],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- slt AT,t_2,zero +- daddu c_3,AT +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sltu AT,c_2,t_2 +- daddu c_3,AT +- sd c_1,24(a0) +- +- dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- slt c_1,t_2,zero +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ +- mflo t_1 +- mfhi t_2 +- daddu c_2,t_1 +- sltu AT,c_2,t_1 +- daddu t_2,AT +- daddu c_3,t_2 +- sltu AT,c_3,t_2 +- daddu c_1,AT +- sd c_2,32(a0) +- +- dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ +- mflo t_1 +- mfhi t_2 +- slt c_2,t_2,zero +- dsll t_2,1 +- slt a2,t_1,zero +- daddu t_2,a2 +- dsll t_1,1 +- daddu c_3,t_1 +- sltu AT,c_3,t_1 +- daddu t_2,AT +- daddu c_1,t_2 +- sltu AT,c_1,t_2 +- daddu c_2,AT +- sd c_3,40(a0) +- +- dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ +- mflo t_1 +- mfhi t_2 +- daddu c_1,t_1 +- sltu AT,c_1,t_1 +- daddu t_2,AT +- daddu c_2,t_2 +- sd c_1,48(a0) +- sd c_2,56(a0) +- +- jr ra +-END(bn_sqr_comba4) +diff --git a/crypto/bn/asm/x86_64-gcc.c b/crypto/bn/asm/x86_64-gcc.c +index 31476ab..2d39407 100644 +--- a/crypto/bn/asm/x86_64-gcc.c ++++ b/crypto/bn/asm/x86_64-gcc.c +@@ -273,6 +273,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) + /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ + /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ + ++/* ++ * Keep in mind that carrying into high part of multiplication result ++ * can not overflow, because it cannot be all-ones. ++ */ + #if 0 + /* original macros are kept for reference purposes */ + #define mul_add_c(a,b,c0,c1,c2) { \ +@@ -287,10 +291,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) + BN_ULONG ta=(a),tb=(b),t0; \ + t1 = BN_UMULT_HIGH(ta,tb); \ + t0 = ta * tb; \ +- t2 = t1+t1; c2 += (t2neg=rand_neg(); ++ BN_sqr(c,a,ctx); + if (bp != NULL) + { + if (!results) + { +- BN_print(bp,&a); ++ BN_print(bp,a); + BIO_puts(bp," * "); +- BN_print(bp,&a); ++ BN_print(bp,a); + BIO_puts(bp," - "); + } +- BN_print(bp,&c); ++ BN_print(bp,c); + BIO_puts(bp,"\n"); + } +- BN_div(&d,&e,&c,&a,ctx); +- BN_sub(&d,&d,&a); +- if(!BN_is_zero(&d) || !BN_is_zero(&e)) +- { +- fprintf(stderr,"Square test failed!\n"); +- return 0; +- } ++ BN_div(d,e,c,a,ctx); ++ BN_sub(d,d,a); ++ if(!BN_is_zero(d) || !BN_is_zero(e)) ++ { ++ fprintf(stderr,"Square test failed!\n"); ++ goto err; ++ } + } +- BN_free(&a); +- BN_free(&c); +- BN_free(&d); +- BN_free(&e); +- return(1); ++ ++ /* Regression test for a BN_sqr overflow bug. */ ++ BN_hex2bn(&a, ++ "80000000000000008000000000000001FFFFFFFFFFFFFFFE0000000000000000"); ++ BN_sqr(c, a, ctx); ++ if (bp != NULL) ++ { ++ if (!results) ++ { ++ BN_print(bp,a); ++ BIO_puts(bp," * "); ++ BN_print(bp,a); ++ BIO_puts(bp," - "); ++ } ++ BN_print(bp,c); ++ BIO_puts(bp,"\n"); ++ } ++ BN_mul(d, a, a, ctx); ++ if (BN_cmp(c, d)) ++ { ++ fprintf(stderr, "Square test failed: BN_sqr and BN_mul produce " ++ "different results!\n"); ++ goto err; ++ } ++ ++ /* Regression test for a BN_sqr overflow bug. */ ++ BN_hex2bn(&a, ++ "80000000000000000000000080000001FFFFFFFE000000000000000000000000"); ++ BN_sqr(c, a, ctx); ++ if (bp != NULL) ++ { ++ if (!results) ++ { ++ BN_print(bp,a); ++ BIO_puts(bp," * "); ++ BN_print(bp,a); ++ BIO_puts(bp," - "); ++ } ++ BN_print(bp,c); ++ BIO_puts(bp,"\n"); ++ } ++ BN_mul(d, a, a, ctx); ++ if (BN_cmp(c, d)) ++ { ++ fprintf(stderr, "Square test failed: BN_sqr and BN_mul produce " ++ "different results!\n"); ++ goto err; ++ } ++ ret = 1; ++err: ++ if (a != NULL) BN_free(a); ++ if (c != NULL) BN_free(c); ++ if (d != NULL) BN_free(d); ++ if (e != NULL) BN_free(e); ++ return ret; + } + + int test_mont(BIO *bp, BN_CTX *ctx) +-- +1.8.3.1 + diff --git a/SOURCES/openssl-1.0.1e-cve-2014-3571.patch b/SOURCES/openssl-1.0.1e-cve-2014-3571.patch new file mode 100644 index 0000000..9c93b7f --- /dev/null +++ b/SOURCES/openssl-1.0.1e-cve-2014-3571.patch @@ -0,0 +1,34 @@ +diff -up openssl-1.0.1e/ssl/d1_pkt.c.dtls1-reads openssl-1.0.1e/ssl/d1_pkt.c +--- openssl-1.0.1e/ssl/d1_pkt.c.dtls1-reads 2015-01-12 17:31:41.647213706 +0100 ++++ openssl-1.0.1e/ssl/d1_pkt.c 2015-01-12 17:38:21.708261411 +0100 +@@ -641,8 +641,6 @@ again: + /* now s->packet_length == DTLS1_RT_HEADER_LENGTH */ + i=rr->length; + n=ssl3_read_n(s,i,i,1); +- if (n <= 0) return(n); /* error or non-blocking io */ +- + /* this packet contained a partial record, dump it */ + if ( n != i) + { +@@ -677,7 +675,8 @@ again: + * would be dropped unnecessarily. + */ + if (!(s->d1->listen && rr->type == SSL3_RT_HANDSHAKE && +- *p == SSL3_MT_CLIENT_HELLO) && ++ s->packet_length > DTLS1_RT_HEADER_LENGTH && ++ s->packet[DTLS1_RT_HEADER_LENGTH] == SSL3_MT_CLIENT_HELLO) && + !dtls1_record_replay_check(s, bitmap)) + { + rr->length = 0; +diff -up openssl-1.0.1e/ssl/s3_pkt.c.dtls1-reads openssl-1.0.1e/ssl/s3_pkt.c +--- openssl-1.0.1e/ssl/s3_pkt.c.dtls1-reads 2015-01-12 17:31:41.680214453 +0100 ++++ openssl-1.0.1e/ssl/s3_pkt.c 2015-01-12 17:38:06.721922482 +0100 +@@ -182,6 +182,8 @@ int ssl3_read_n(SSL *s, int n, int max, + * at once (as long as it fits into the buffer). */ + if (SSL_version(s) == DTLS1_VERSION || SSL_version(s) == DTLS1_BAD_VER) + { ++ if (left == 0 && extend) ++ return 0; + if (left > 0 && n > left) + n = left; + } diff --git a/SOURCES/openssl-1.0.1e-cve-2014-3572.patch b/SOURCES/openssl-1.0.1e-cve-2014-3572.patch new file mode 100644 index 0000000..013876b --- /dev/null +++ b/SOURCES/openssl-1.0.1e-cve-2014-3572.patch @@ -0,0 +1,51 @@ +diff -up openssl-1.0.1e/ssl/s3_clnt.c.ecdh-downgrade openssl-1.0.1e/ssl/s3_clnt.c +--- openssl-1.0.1e/ssl/s3_clnt.c.ecdh-downgrade 2015-01-12 16:37:49.978126895 +0100 ++++ openssl-1.0.1e/ssl/s3_clnt.c 2015-01-12 17:02:01.740959687 +0100 +@@ -1287,6 +1287,8 @@ int ssl3_get_key_exchange(SSL *s) + int encoded_pt_len = 0; + #endif + ++ EVP_MD_CTX_init(&md_ctx); ++ + /* use same message size as in ssl3_get_certificate_request() + * as ServerKeyExchange message may be skipped */ + n=s->method->ssl_get_message(s, +@@ -1297,14 +1299,26 @@ int ssl3_get_key_exchange(SSL *s) + &ok); + if (!ok) return((int)n); + ++ alg_k=s->s3->tmp.new_cipher->algorithm_mkey; ++ + if (s->s3->tmp.message_type != SSL3_MT_SERVER_KEY_EXCHANGE) + { ++ /* ++ * Can't skip server key exchange if this is an ephemeral ++ * ciphersuite. ++ */ ++ if (alg_k & (SSL_kEDH|SSL_kEECDH)) ++ { ++ SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, SSL_R_UNEXPECTED_MESSAGE); ++ al = SSL_AD_UNEXPECTED_MESSAGE; ++ goto f_err; ++ } + #ifndef OPENSSL_NO_PSK + /* In plain PSK ciphersuite, ServerKeyExchange can be + omitted if no identity hint is sent. Set + session->sess_cert anyway to avoid problems + later.*/ +- if (s->s3->tmp.new_cipher->algorithm_mkey & SSL_kPSK) ++ if (alg_k & SSL_kPSK) + { + s->session->sess_cert=ssl_sess_cert_new(); + if (s->ctx->psk_identity_hint) +@@ -1347,9 +1361,8 @@ int ssl3_get_key_exchange(SSL *s) + } + + param_len=0; +- alg_k=s->s3->tmp.new_cipher->algorithm_mkey; ++ + alg_a=s->s3->tmp.new_cipher->algorithm_auth; +- EVP_MD_CTX_init(&md_ctx); + + #ifndef OPENSSL_NO_PSK + if (alg_k & SSL_kPSK) diff --git a/SOURCES/openssl-1.0.1e-cve-2014-8275.patch b/SOURCES/openssl-1.0.1e-cve-2014-8275.patch new file mode 100644 index 0000000..91041ac --- /dev/null +++ b/SOURCES/openssl-1.0.1e-cve-2014-8275.patch @@ -0,0 +1,202 @@ +diff -up openssl-1.0.1e/crypto/asn1/a_bitstr.c.cert-fingerprint openssl-1.0.1e/crypto/asn1/a_bitstr.c +--- openssl-1.0.1e/crypto/asn1/a_bitstr.c.cert-fingerprint 2013-02-11 16:26:04.000000000 +0100 ++++ openssl-1.0.1e/crypto/asn1/a_bitstr.c 2015-01-13 12:23:36.090478923 +0100 +@@ -136,11 +136,16 @@ ASN1_BIT_STRING *c2i_ASN1_BIT_STRING(ASN + + p= *pp; + i= *(p++); ++ if (i > 7) ++ { ++ i=ASN1_R_INVALID_BIT_STRING_BITS_LEFT; ++ goto err; ++ } + /* We do this to preserve the settings. If we modify + * the settings, via the _set_bit function, we will recalculate + * on output */ + ret->flags&= ~(ASN1_STRING_FLAG_BITS_LEFT|0x07); /* clear */ +- ret->flags|=(ASN1_STRING_FLAG_BITS_LEFT|(i&0x07)); /* set */ ++ ret->flags|=(ASN1_STRING_FLAG_BITS_LEFT|i); /* set */ + + if (len-- > 1) /* using one because of the bits left byte */ + { +diff -up openssl-1.0.1e/crypto/asn1/asn1_err.c.cert-fingerprint openssl-1.0.1e/crypto/asn1/asn1_err.c +--- openssl-1.0.1e/crypto/asn1/asn1_err.c.cert-fingerprint 2013-02-11 16:26:04.000000000 +0100 ++++ openssl-1.0.1e/crypto/asn1/asn1_err.c 2015-01-13 12:23:36.090478923 +0100 +@@ -246,6 +246,7 @@ static ERR_STRING_DATA ASN1_str_reasons[ + {ERR_REASON(ASN1_R_ILLEGAL_TIME_VALUE) ,"illegal time value"}, + {ERR_REASON(ASN1_R_INTEGER_NOT_ASCII_FORMAT),"integer not ascii format"}, + {ERR_REASON(ASN1_R_INTEGER_TOO_LARGE_FOR_LONG),"integer too large for long"}, ++{ERR_REASON(ASN1_R_INVALID_BIT_STRING_BITS_LEFT),"invalid bit string bits left"}, + {ERR_REASON(ASN1_R_INVALID_BMPSTRING_LENGTH),"invalid bmpstring length"}, + {ERR_REASON(ASN1_R_INVALID_DIGIT) ,"invalid digit"}, + {ERR_REASON(ASN1_R_INVALID_MIME_TYPE) ,"invalid mime type"}, +diff -up openssl-1.0.1e/crypto/asn1/asn1.h.cert-fingerprint openssl-1.0.1e/crypto/asn1/asn1.h +--- openssl-1.0.1e/crypto/asn1/asn1.h.cert-fingerprint 2015-01-13 11:44:11.999013082 +0100 ++++ openssl-1.0.1e/crypto/asn1/asn1.h 2015-01-13 12:23:36.090478923 +0100 +@@ -776,7 +776,7 @@ DECLARE_ASN1_FUNCTIONS_fname(ASN1_TYPE, + int ASN1_TYPE_get(ASN1_TYPE *a); + void ASN1_TYPE_set(ASN1_TYPE *a, int type, void *value); + int ASN1_TYPE_set1(ASN1_TYPE *a, int type, const void *value); +-int ASN1_TYPE_cmp(ASN1_TYPE *a, ASN1_TYPE *b); ++int ASN1_TYPE_cmp(const ASN1_TYPE *a, const ASN1_TYPE *b); + + ASN1_OBJECT * ASN1_OBJECT_new(void ); + void ASN1_OBJECT_free(ASN1_OBJECT *a); +@@ -1329,6 +1329,7 @@ void ERR_load_ASN1_strings(void); + #define ASN1_R_ILLEGAL_TIME_VALUE 184 + #define ASN1_R_INTEGER_NOT_ASCII_FORMAT 185 + #define ASN1_R_INTEGER_TOO_LARGE_FOR_LONG 128 ++#define ASN1_R_INVALID_BIT_STRING_BITS_LEFT 220 + #define ASN1_R_INVALID_BMPSTRING_LENGTH 129 + #define ASN1_R_INVALID_DIGIT 130 + #define ASN1_R_INVALID_MIME_TYPE 205 +diff -up openssl-1.0.1e/crypto/asn1/a_type.c.cert-fingerprint openssl-1.0.1e/crypto/asn1/a_type.c +--- openssl-1.0.1e/crypto/asn1/a_type.c.cert-fingerprint 2013-02-11 16:26:04.000000000 +0100 ++++ openssl-1.0.1e/crypto/asn1/a_type.c 2015-01-13 12:43:36.779633480 +0100 +@@ -113,7 +113,7 @@ IMPLEMENT_STACK_OF(ASN1_TYPE) + IMPLEMENT_ASN1_SET_OF(ASN1_TYPE) + + /* Returns 0 if they are equal, != 0 otherwise. */ +-int ASN1_TYPE_cmp(ASN1_TYPE *a, ASN1_TYPE *b) ++int ASN1_TYPE_cmp(const ASN1_TYPE *a, const ASN1_TYPE *b) + { + int result = -1; + +diff -up openssl-1.0.1e/crypto/asn1/a_verify.c.cert-fingerprint openssl-1.0.1e/crypto/asn1/a_verify.c +--- openssl-1.0.1e/crypto/asn1/a_verify.c.cert-fingerprint 2015-01-13 11:44:12.308020070 +0100 ++++ openssl-1.0.1e/crypto/asn1/a_verify.c 2015-01-13 11:44:12.413022445 +0100 +@@ -93,6 +93,12 @@ int ASN1_verify(i2d_of_void *i2d, X509_A + ASN1err(ASN1_F_ASN1_VERIFY,ASN1_R_UNKNOWN_MESSAGE_DIGEST_ALGORITHM); + goto err; + } ++ ++ if (signature->type == V_ASN1_BIT_STRING && signature->flags & 0x7) ++ { ++ ASN1err(ASN1_F_ASN1_VERIFY, ASN1_R_INVALID_BIT_STRING_BITS_LEFT); ++ goto err; ++ } + + inl=i2d(data,NULL); + buf_in=OPENSSL_malloc((unsigned int)inl); +@@ -149,6 +155,12 @@ int ASN1_item_verify(const ASN1_ITEM *it + return -1; + } + ++ if (signature->type == V_ASN1_BIT_STRING && signature->flags & 0x7) ++ { ++ ASN1err(ASN1_F_ASN1_ITEM_VERIFY, ASN1_R_INVALID_BIT_STRING_BITS_LEFT); ++ return -1; ++ } ++ + EVP_MD_CTX_init(&ctx); + + /* Convert signature OID into digest and public key OIDs */ +diff -up openssl-1.0.1e/crypto/asn1/x_algor.c.cert-fingerprint openssl-1.0.1e/crypto/asn1/x_algor.c +--- openssl-1.0.1e/crypto/asn1/x_algor.c.cert-fingerprint 2013-02-11 16:26:04.000000000 +0100 ++++ openssl-1.0.1e/crypto/asn1/x_algor.c 2015-01-13 12:43:36.780633502 +0100 +@@ -142,3 +142,14 @@ void X509_ALGOR_set_md(X509_ALGOR *alg, + X509_ALGOR_set0(alg, OBJ_nid2obj(EVP_MD_type(md)), param_type, NULL); + + } ++ ++int X509_ALGOR_cmp(const X509_ALGOR *a, const X509_ALGOR *b) ++ { ++ int rv; ++ rv = OBJ_cmp(a->algorithm, b->algorithm); ++ if (rv) ++ return rv; ++ if (!a->parameter && !b->parameter) ++ return 0; ++ return ASN1_TYPE_cmp(a->parameter, b->parameter); ++ } +diff -up openssl-1.0.1e/crypto/dsa/dsa_asn1.c.cert-fingerprint openssl-1.0.1e/crypto/dsa/dsa_asn1.c +--- openssl-1.0.1e/crypto/dsa/dsa_asn1.c.cert-fingerprint 2013-02-11 16:26:04.000000000 +0100 ++++ openssl-1.0.1e/crypto/dsa/dsa_asn1.c 2015-01-13 11:44:12.414022468 +0100 +@@ -176,13 +176,25 @@ int DSA_verify(int type, const unsigned + const unsigned char *sigbuf, int siglen, DSA *dsa) + { + DSA_SIG *s; ++ const unsigned char *p = sigbuf; ++ unsigned char *der = NULL; ++ int derlen = -1; + int ret=-1; + + s = DSA_SIG_new(); + if (s == NULL) return(ret); +- if (d2i_DSA_SIG(&s,&sigbuf,siglen) == NULL) goto err; ++ if (d2i_DSA_SIG(&s,&p,siglen) == NULL) goto err; ++ /* Ensure signature uses DER and doesn't have trailing garbage */ ++ derlen = i2d_DSA_SIG(s, &der); ++ if (derlen != siglen || memcmp(sigbuf, der, derlen)) ++ goto err; + ret=DSA_do_verify(dgst,dgst_len,s,dsa); + err: ++ if (derlen > 0) ++ { ++ OPENSSL_cleanse(der, derlen); ++ OPENSSL_free(der); ++ } + DSA_SIG_free(s); + return(ret); + } +diff -up openssl-1.0.1e/crypto/ecdsa/ecs_vrf.c.cert-fingerprint openssl-1.0.1e/crypto/ecdsa/ecs_vrf.c +--- openssl-1.0.1e/crypto/ecdsa/ecs_vrf.c.cert-fingerprint 2013-02-11 16:02:48.000000000 +0100 ++++ openssl-1.0.1e/crypto/ecdsa/ecs_vrf.c 2015-01-13 11:44:12.414022468 +0100 +@@ -57,6 +57,7 @@ + */ + + #include "ecs_locl.h" ++#include "cryptlib.h" + #ifndef OPENSSL_NO_ENGINE + #include + #endif +@@ -84,13 +85,25 @@ int ECDSA_verify(int type, const unsigne + const unsigned char *sigbuf, int sig_len, EC_KEY *eckey) + { + ECDSA_SIG *s; ++ const unsigned char *p = sigbuf; ++ unsigned char *der = NULL; ++ int derlen = -1; + int ret=-1; + + s = ECDSA_SIG_new(); + if (s == NULL) return(ret); +- if (d2i_ECDSA_SIG(&s, &sigbuf, sig_len) == NULL) goto err; ++ if (d2i_ECDSA_SIG(&s, &p, sig_len) == NULL) goto err; ++ /* Ensure signature uses DER and doesn't have trailing garbage */ ++ derlen = i2d_ECDSA_SIG(s, &der); ++ if (derlen != sig_len || memcmp(sigbuf, der, derlen)) ++ goto err; + ret=ECDSA_do_verify(dgst, dgst_len, s, eckey); + err: ++ if (derlen > 0) ++ { ++ OPENSSL_cleanse(der, derlen); ++ OPENSSL_free(der); ++ } + ECDSA_SIG_free(s); + return(ret); + } +diff -up openssl-1.0.1e/crypto/x509/x_all.c.cert-fingerprint openssl-1.0.1e/crypto/x509/x_all.c +--- openssl-1.0.1e/crypto/x509/x_all.c.cert-fingerprint 2015-01-13 11:44:12.330020568 +0100 ++++ openssl-1.0.1e/crypto/x509/x_all.c 2015-01-13 11:44:12.414022468 +0100 +@@ -72,6 +72,8 @@ + + int X509_verify(X509 *a, EVP_PKEY *r) + { ++ if (X509_ALGOR_cmp(a->sig_alg, a->cert_info->signature)) ++ return 0; + return(ASN1_item_verify(ASN1_ITEM_rptr(X509_CINF),a->sig_alg, + a->signature,a->cert_info,r)); + } +diff -up openssl-1.0.1e/crypto/x509/x509.h.cert-fingerprint openssl-1.0.1e/crypto/x509/x509.h +--- openssl-1.0.1e/crypto/x509/x509.h.cert-fingerprint 2015-01-13 11:44:12.126015954 +0100 ++++ openssl-1.0.1e/crypto/x509/x509.h 2015-01-13 12:43:36.780633502 +0100 +@@ -768,6 +768,7 @@ int X509_ALGOR_set0(X509_ALGOR *alg, ASN + void X509_ALGOR_get0(ASN1_OBJECT **paobj, int *pptype, void **ppval, + X509_ALGOR *algor); + void X509_ALGOR_set_md(X509_ALGOR *alg, const EVP_MD *md); ++int X509_ALGOR_cmp(const X509_ALGOR *a, const X509_ALGOR *b); + + X509_NAME *X509_NAME_dup(X509_NAME *xn); + X509_NAME_ENTRY *X509_NAME_ENTRY_dup(X509_NAME_ENTRY *ne); diff --git a/SOURCES/openssl-1.0.1e-cve-2015-0204.patch b/SOURCES/openssl-1.0.1e-cve-2015-0204.patch new file mode 100644 index 0000000..527f7c5 --- /dev/null +++ b/SOURCES/openssl-1.0.1e-cve-2015-0204.patch @@ -0,0 +1,158 @@ +diff -up openssl-1.0.1e/doc/ssl/SSL_CTX_set_options.pod.rsa-ephemeral openssl-1.0.1e/doc/ssl/SSL_CTX_set_options.pod +--- openssl-1.0.1e/doc/ssl/SSL_CTX_set_options.pod.rsa-ephemeral 2013-02-11 16:26:04.000000000 +0100 ++++ openssl-1.0.1e/doc/ssl/SSL_CTX_set_options.pod 2015-01-13 11:15:25.096957795 +0100 +@@ -151,15 +151,7 @@ temporary/ephemeral DH parameters are us + + =item SSL_OP_EPHEMERAL_RSA + +-Always use ephemeral (temporary) RSA key when doing RSA operations +-(see L). +-According to the specifications this is only done, when a RSA key +-can only be used for signature operations (namely under export ciphers +-with restricted RSA keylength). By setting this option, ephemeral +-RSA keys are always used. This option breaks compatibility with the +-SSL/TLS specifications and may lead to interoperability problems with +-clients and should therefore never be used. Ciphers with EDH (ephemeral +-Diffie-Hellman) key exchange should be used instead. ++This option is no longer implemented and is treated as no op. + + =item SSL_OP_CIPHER_SERVER_PREFERENCE + +diff -up openssl-1.0.1e/doc/ssl/SSL_CTX_set_tmp_rsa_callback.pod.rsa-ephemeral openssl-1.0.1e/doc/ssl/SSL_CTX_set_tmp_rsa_callback.pod +--- openssl-1.0.1e/doc/ssl/SSL_CTX_set_tmp_rsa_callback.pod.rsa-ephemeral 2013-02-11 16:02:48.000000000 +0100 ++++ openssl-1.0.1e/doc/ssl/SSL_CTX_set_tmp_rsa_callback.pod 2015-01-13 11:15:25.096957795 +0100 +@@ -74,21 +74,14 @@ exchange and use EDH (Ephemeral Diffie-H + in order to achieve forward secrecy (see + L). + +-On OpenSSL servers ephemeral RSA key exchange is therefore disabled by default +-and must be explicitly enabled using the SSL_OP_EPHEMERAL_RSA option of +-L, violating the TLS/SSL +-standard. When ephemeral RSA key exchange is required for export ciphers, +-it will automatically be used without this option! +- +-An application may either directly specify the key or can supply the key via +-a callback function. The callback approach has the advantage, that the +-callback may generate the key only in case it is actually needed. As the +-generation of a RSA key is however costly, it will lead to a significant +-delay in the handshake procedure. Another advantage of the callback function +-is that it can supply keys of different size (e.g. for SSL_OP_EPHEMERAL_RSA +-usage) while the explicit setting of the key is only useful for key size of +-512 bits to satisfy the export restricted ciphers and does give away key length +-if a longer key would be allowed. ++An application may either directly specify the key or can supply the key via a ++callback function. The callback approach has the advantage, that the callback ++may generate the key only in case it is actually needed. As the generation of a ++RSA key is however costly, it will lead to a significant delay in the handshake ++procedure. Another advantage of the callback function is that it can supply ++keys of different size while the explicit setting of the key is only useful for ++key size of 512 bits to satisfy the export restricted ciphers and does give ++away key length if a longer key would be allowed. + + The B is called with the B needed and + the B information. The B flag is set, when the +diff -up openssl-1.0.1e/CHANGES.rsa-ephemeral openssl-1.0.1e/CHANGES +diff -up openssl-1.0.1e/ssl/d1_srvr.c.rsa-ephemeral openssl-1.0.1e/ssl/d1_srvr.c +--- openssl-1.0.1e/ssl/d1_srvr.c.rsa-ephemeral 2015-01-12 17:49:04.912808002 +0100 ++++ openssl-1.0.1e/ssl/d1_srvr.c 2015-01-13 11:15:25.096957795 +0100 +@@ -450,24 +450,15 @@ int dtls1_accept(SSL *s) + case SSL3_ST_SW_KEY_EXCH_B: + alg_k = s->s3->tmp.new_cipher->algorithm_mkey; + +- /* clear this, it may get reset by +- * send_server_key_exchange */ +- if ((s->options & SSL_OP_EPHEMERAL_RSA) +-#ifndef OPENSSL_NO_KRB5 +- && !(alg_k & SSL_kKRB5) +-#endif /* OPENSSL_NO_KRB5 */ +- ) +- /* option SSL_OP_EPHEMERAL_RSA sends temporary RSA key +- * even when forbidden by protocol specs +- * (handshake may fail as clients are not required to +- * be able to handle this) */ +- s->s3->tmp.use_rsa_tmp=1; +- else +- s->s3->tmp.use_rsa_tmp=0; ++ /* ++ * clear this, it may get reset by ++ * send_server_key_exchange ++ */ ++ s->s3->tmp.use_rsa_tmp=0; + + /* only send if a DH key exchange or + * RSA but we have a sign only certificate */ +- if (s->s3->tmp.use_rsa_tmp ++ if (0 + /* PSK: send ServerKeyExchange if PSK identity + * hint if provided */ + #ifndef OPENSSL_NO_PSK +diff -up openssl-1.0.1e/ssl/ssl.h.rsa-ephemeral openssl-1.0.1e/ssl/ssl.h +--- openssl-1.0.1e/ssl/ssl.h.rsa-ephemeral 2015-01-12 17:49:04.936808545 +0100 ++++ openssl-1.0.1e/ssl/ssl.h 2015-01-13 11:15:25.098957840 +0100 +@@ -587,9 +587,8 @@ struct ssl_session_st + #define SSL_OP_SINGLE_ECDH_USE 0x00080000L + /* If set, always create a new key when using tmp_dh parameters */ + #define SSL_OP_SINGLE_DH_USE 0x00100000L +-/* Set to always use the tmp_rsa key when doing RSA operations, +- * even when this violates protocol specs */ +-#define SSL_OP_EPHEMERAL_RSA 0x00200000L ++/* Does nothing: retained for compatibiity */ ++#define SSL_OP_EPHEMERAL_RSA 0x0 + /* Set on servers to choose the cipher according to the server's + * preferences */ + #define SSL_OP_CIPHER_SERVER_PREFERENCE 0x00400000L +diff -up openssl-1.0.1e/ssl/s3_clnt.c.rsa-ephemeral openssl-1.0.1e/ssl/s3_clnt.c +--- openssl-1.0.1e/ssl/s3_clnt.c.rsa-ephemeral 2015-01-12 17:49:04.946808771 +0100 ++++ openssl-1.0.1e/ssl/s3_clnt.c 2015-01-13 11:15:25.097957817 +0100 +@@ -1492,6 +1492,13 @@ int ssl3_get_key_exchange(SSL *s) + #ifndef OPENSSL_NO_RSA + if (alg_k & SSL_kRSA) + { ++ /* Temporary RSA keys only allowed in export ciphersuites */ ++ if (!SSL_C_IS_EXPORT(s->s3->tmp.new_cipher)) ++ { ++ al=SSL_AD_UNEXPECTED_MESSAGE; ++ SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,SSL_R_UNEXPECTED_MESSAGE); ++ goto f_err; ++ } + if ((rsa=RSA_new()) == NULL) + { + SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,ERR_R_MALLOC_FAILURE); +diff -up openssl-1.0.1e/ssl/s3_srvr.c.rsa-ephemeral openssl-1.0.1e/ssl/s3_srvr.c +--- openssl-1.0.1e/ssl/s3_srvr.c.rsa-ephemeral 2015-01-12 17:51:32.044135496 +0100 ++++ openssl-1.0.1e/ssl/s3_srvr.c 2015-01-13 11:15:25.098957840 +0100 +@@ -441,20 +441,11 @@ int ssl3_accept(SSL *s) + case SSL3_ST_SW_KEY_EXCH_B: + alg_k = s->s3->tmp.new_cipher->algorithm_mkey; + +- /* clear this, it may get reset by +- * send_server_key_exchange */ +- if ((s->options & SSL_OP_EPHEMERAL_RSA) +-#ifndef OPENSSL_NO_KRB5 +- && !(alg_k & SSL_kKRB5) +-#endif /* OPENSSL_NO_KRB5 */ +- ) +- /* option SSL_OP_EPHEMERAL_RSA sends temporary RSA key +- * even when forbidden by protocol specs +- * (handshake may fail as clients are not required to +- * be able to handle this) */ +- s->s3->tmp.use_rsa_tmp=1; +- else +- s->s3->tmp.use_rsa_tmp=0; ++ /* ++ * clear this, it may get reset by ++ * send_server_key_exchange ++ */ ++ s->s3->tmp.use_rsa_tmp=0; + + + /* only send if a DH key exchange, fortezza or +@@ -468,7 +459,7 @@ int ssl3_accept(SSL *s) + * server certificate contains the server's + * public key for key exchange. + */ +- if (s->s3->tmp.use_rsa_tmp ++ if (0 + /* PSK: send ServerKeyExchange if PSK identity + * hint if provided */ + #ifndef OPENSSL_NO_PSK diff --git a/SOURCES/openssl-1.0.1e-cve-2015-0205.patch b/SOURCES/openssl-1.0.1e-cve-2015-0205.patch new file mode 100644 index 0000000..ff378ae --- /dev/null +++ b/SOURCES/openssl-1.0.1e-cve-2015-0205.patch @@ -0,0 +1,12 @@ +diff -up openssl-1.0.1e/ssl/s3_srvr.c.dh-unauthenticated openssl-1.0.1e/ssl/s3_srvr.c +--- openssl-1.0.1e/ssl/s3_srvr.c.dh-unauthenticated 2015-01-12 17:49:04.930808409 +0100 ++++ openssl-1.0.1e/ssl/s3_srvr.c 2015-01-13 11:15:25.098957840 +0100 +@@ -2951,7 +2951,7 @@ int ssl3_get_cert_verify(SSL *s) + if (s->s3->tmp.message_type != SSL3_MT_CERTIFICATE_VERIFY) + { + s->s3->tmp.reuse_message=1; +- if ((peer != NULL) && (type & EVP_PKT_SIGN)) ++ if (peer != NULL) + { + al=SSL_AD_UNEXPECTED_MESSAGE; + SSLerr(SSL_F_SSL3_GET_CERT_VERIFY,SSL_R_MISSING_VERIFY_MESSAGE); diff --git a/SOURCES/openssl-1.0.1e-cve-2015-0206.patch b/SOURCES/openssl-1.0.1e-cve-2015-0206.patch new file mode 100644 index 0000000..e2d1718 --- /dev/null +++ b/SOURCES/openssl-1.0.1e-cve-2015-0206.patch @@ -0,0 +1,116 @@ +diff -up openssl-1.0.1e/ssl/d1_pkt.c.dtls-recleak openssl-1.0.1e/ssl/d1_pkt.c +--- openssl-1.0.1e/ssl/d1_pkt.c.dtls-rec-leak 2015-01-13 11:44:12.410022377 +0100 ++++ openssl-1.0.1e/ssl/d1_pkt.c 2015-01-13 11:50:40.062789458 +0100 +@@ -212,7 +212,7 @@ dtls1_buffer_record(SSL *s, record_pqueu + /* Limit the size of the queue to prevent DOS attacks */ + if (pqueue_size(queue->q) >= 100) + return 0; +- ++ + rdata = OPENSSL_malloc(sizeof(DTLS1_RECORD_DATA)); + item = pitem_new(priority, rdata); + if (rdata == NULL || item == NULL) +@@ -239,14 +239,6 @@ dtls1_buffer_record(SSL *s, record_pqueu + } + #endif + +- /* insert should not fail, since duplicates are dropped */ +- if (pqueue_insert(queue->q, item) == NULL) +- { +- OPENSSL_free(rdata); +- pitem_free(item); +- return(0); +- } +- + s->packet = NULL; + s->packet_length = 0; + memset(&(s->s3->rbuf), 0, sizeof(SSL3_BUFFER)); +@@ -255,11 +247,24 @@ dtls1_buffer_record(SSL *s, record_pqueu + if (!ssl3_setup_buffers(s)) + { + SSLerr(SSL_F_DTLS1_BUFFER_RECORD, ERR_R_INTERNAL_ERROR); ++ if (rdata->rbuf.buf != NULL) ++ OPENSSL_free(rdata->rbuf.buf); + OPENSSL_free(rdata); + pitem_free(item); +- return(0); ++ return(-1); + } +- ++ ++ /* insert should not fail, since duplicates are dropped */ ++ if (pqueue_insert(queue->q, item) == NULL) ++ { ++ SSLerr(SSL_F_DTLS1_BUFFER_RECORD, ERR_R_INTERNAL_ERROR); ++ if (rdata->rbuf.buf != NULL) ++ OPENSSL_free(rdata->rbuf.buf); ++ OPENSSL_free(rdata); ++ pitem_free(item); ++ return(-1); ++ } ++ + return(1); + } + +@@ -313,8 +318,9 @@ dtls1_process_buffered_records(SSL *s) + dtls1_get_unprocessed_record(s); + if ( ! dtls1_process_record(s)) + return(0); +- dtls1_buffer_record(s, &(s->d1->processed_rcds), +- s->s3->rrec.seq_num); ++ if(dtls1_buffer_record(s, &(s->d1->processed_rcds), ++ s->s3->rrec.seq_num)<0) ++ return -1; + } + } + +@@ -529,7 +535,6 @@ printf("\n"); + + /* we have pulled in a full packet so zero things */ + s->packet_length=0; +- dtls1_record_bitmap_update(s, &(s->d1->bitmap));/* Mark receipt of record. */ + return(1); + + f_err: +@@ -562,7 +567,8 @@ int dtls1_get_record(SSL *s) + + /* The epoch may have changed. If so, process all the + * pending records. This is a non-blocking operation. */ +- dtls1_process_buffered_records(s); ++ if(dtls1_process_buffered_records(s)<0) ++ return -1; + + /* if we're renegotiating, then there may be buffered records */ + if (dtls1_get_processed_record(s)) +@@ -699,7 +705,9 @@ again: + { + if ((SSL_in_init(s) || s->in_handshake) && !s->d1->listen) + { +- dtls1_buffer_record(s, &(s->d1->unprocessed_rcds), rr->seq_num); ++ if(dtls1_buffer_record(s, &(s->d1->unprocessed_rcds), rr->seq_num)<0) ++ return -1; ++ dtls1_record_bitmap_update(s, bitmap);/* Mark receipt of record. */ + } + rr->length = 0; + s->packet_length = 0; +@@ -712,6 +720,7 @@ again: + s->packet_length = 0; /* dump this record */ + goto again; /* get another record */ + } ++ dtls1_record_bitmap_update(s, bitmap);/* Mark receipt of record. */ + + return(1); + +@@ -863,7 +872,11 @@ start: + * buffer the application data for later processing rather + * than dropping the connection. + */ +- dtls1_buffer_record(s, &(s->d1->buffered_app_data), rr->seq_num); ++ if(dtls1_buffer_record(s, &(s->d1->buffered_app_data), rr->seq_num)<0) ++ { ++ SSLerr(SSL_F_DTLS1_READ_BYTES, ERR_R_INTERNAL_ERROR); ++ return -1; ++ } + rr->length = 0; + goto start; + } diff --git a/SPECS/openssl.spec b/SPECS/openssl.spec index 663e74f..b01854b 100644 --- a/SPECS/openssl.spec +++ b/SPECS/openssl.spec @@ -23,7 +23,7 @@ Summary: Utilities from the general purpose cryptography library with TLS implementation Name: openssl Version: 1.0.1e -Release: 34%{?dist}.6 +Release: 34%{?dist}.7 Epoch: 1 # We have to remove certain patented algorithms from the openssl source # tarball with the hobble-openssl script which is included below. @@ -109,6 +109,13 @@ Patch106: openssl-1.0.1e-cve-2014-3511.patch Patch110: openssl-1.0.1e-cve-2014-3567.patch Patch111: openssl-1.0.1e-cve-2014-3513.patch Patch112: openssl-1.0.1e-fallback-scsv.patch +Patch114: openssl-1.0.1e-cve-2014-3570.patch +Patch115: openssl-1.0.1e-cve-2014-3571.patch +Patch116: openssl-1.0.1e-cve-2014-3572.patch +Patch117: openssl-1.0.1e-cve-2014-8275.patch +Patch118: openssl-1.0.1e-cve-2015-0204.patch +Patch119: openssl-1.0.1e-cve-2015-0205.patch +Patch120: openssl-1.0.1e-cve-2015-0206.patch License: OpenSSL Group: System Environment/Libraries @@ -249,6 +256,13 @@ cp %{SOURCE12} %{SOURCE13} crypto/ec/ %patch110 -p1 -b .ticket-leak %patch111 -p1 -b .srtp-leak %patch112 -p1 -b .fallback-scsv +%patch114 -p1 -b .bn-sqr +%patch115 -p1 -b .dtls1-reads +%patch116 -p1 -b .ecdh-downgrade +%patch117 -p1 -b .cert-fingerprint +%patch118 -p1 -b .rsa-ephemeral +%patch119 -p1 -b .dh-unauthenticated +%patch120 -p1 -b .dtls-rec-leak sed -i 's/SHLIB_VERSION_NUMBER "1.0.0"/SHLIB_VERSION_NUMBER "%{version}"/' crypto/opensslv.h @@ -512,6 +526,16 @@ rm -rf $RPM_BUILD_ROOT/%{_libdir}/fipscanister.* %postun libs -p /sbin/ldconfig %changelog +* Tue Jan 13 2015 Tomáš Mráz 1.0.1e-34.7 +- fix CVE-2014-3570 - incorrect computation in BN_sqr() +- fix CVE-2014-3571 - possible crash in dtls1_get_record() +- fix CVE-2014-3572 - possible downgrade of ECDH ciphersuite to non-PFS state +- fix CVE-2014-8275 - various certificate fingerprint issues +- fix CVE-2015-0204 - remove support for RSA ephemeral keys for non-export + ciphersuites and on server +- fix CVE-2015-0205 - do not allow unauthenticated client DH certificate +- fix CVE-2015-0206 - possible memory leak when buffering DTLS records + * Wed Oct 15 2014 Tomáš Mráz 1.0.1e-34.6 - fix CVE-2014-3567 - memory leak when handling session tickets - fix CVE-2014-3513 - memory leak in srtp support