diff -up openssl-1.0.2k/crypto/bn/bn_div.c.rohnp-fix openssl-1.0.2k/crypto/bn/bn_div.c --- openssl-1.0.2k/crypto/bn/bn_div.c.rohnp-fix 2017-01-26 14:22:03.000000000 +0100 +++ openssl-1.0.2k/crypto/bn/bn_div.c 2018-08-14 10:57:21.592518702 +0200 @@ -290,6 +290,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const wnum.neg = 0; wnum.d = &(snum->d[loop]); wnum.top = div_n; + wnum.flags = BN_FLG_STATIC_DATA; /* * only needed when BN_ucmp messes up the values between top and max */ diff -up openssl-1.0.2k/crypto/bn/bn_exp.c.rohnp-fix openssl-1.0.2k/crypto/bn/bn_exp.c --- openssl-1.0.2k/crypto/bn/bn_exp.c.rohnp-fix 2017-01-26 14:22:03.000000000 +0100 +++ openssl-1.0.2k/crypto/bn/bn_exp.c 2018-08-14 10:57:21.596518798 +0200 @@ -466,17 +466,17 @@ int BN_mod_exp_mont(BIGNUM *rr, const BI ret = 1; goto err; } - if (!BN_to_montgomery(val[0], aa, mont, ctx)) + if (!bn_to_mont_fixed_top(val[0], aa, mont, ctx)) goto err; /* 1 */ window = BN_window_bits_for_exponent_size(bits); if (window > 1) { - if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) + if (!bn_mul_mont_fixed_top(d, val[0], val[0], mont, ctx)) goto err; /* 2 */ j = 1 << (window - 1); for (i = 1; i < j; i++) { if (((val[i] = BN_CTX_get(ctx)) == NULL) || - !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx)) + !bn_mul_mont_fixed_top(val[i], val[i - 1], d, mont, ctx)) goto err; } } @@ -498,19 +498,15 @@ int BN_mod_exp_mont(BIGNUM *rr, const BI for (i = 1; i < j; i++) r->d[i] = (~m->d[i]) & BN_MASK2; r->top = j; - /* - * Upper words will be zero if the corresponding words of 'm' were - * 0xfff[...], so decrement r->top accordingly. - */ - bn_correct_top(r); + r->flags |= BN_FLG_FIXED_TOP; } else #endif - if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) + if (!bn_to_mont_fixed_top(r, BN_value_one(), mont, ctx)) goto err; for (;;) { if (BN_is_bit_set(p, wstart) == 0) { if (!start) { - if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) + if (!bn_mul_mont_fixed_top(r, r, r, mont, ctx)) goto err; } if (wstart == 0) @@ -541,12 +537,12 @@ int BN_mod_exp_mont(BIGNUM *rr, const BI /* add the 'bytes above' */ if (!start) for (i = 0; i < j; i++) { - if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) + if (!bn_mul_mont_fixed_top(r, r, r, mont, ctx)) goto err; } /* wvalue will be an odd number < 2^window */ - if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) + if (!bn_mul_mont_fixed_top(r, r, val[wvalue >> 1], mont, ctx)) goto err; /* move the 'window' down further */ @@ -556,6 +552,11 @@ int BN_mod_exp_mont(BIGNUM *rr, const BI if (wstart < 0) break; } + /* + * Done with zero-padded intermediate BIGNUMs. Final BN_from_montgomery + * removes padding [if any] and makes return value suitable for public + * API consumer. + */ #if defined(SPARC_T4_MONT) if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3 | SPARCV9_PREFER_FPU)) { j = mont->N.top; /* borrow j */ @@ -674,7 +675,7 @@ static int MOD_EXP_CTIME_COPY_FROM_PREBU } b->top = top; - bn_correct_top(b); + b->flags |= BN_FLG_FIXED_TOP; return 1; } @@ -841,16 +842,16 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr tmp.top = top; } else #endif - if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) + if (!bn_to_mont_fixed_top(&tmp, BN_value_one(), mont, ctx)) goto err; /* prepare a^1 in Montgomery domain */ if (a->neg || BN_ucmp(a, m) >= 0) { if (!BN_mod(&am, a, m, ctx)) goto err; - if (!BN_to_montgomery(&am, &am, mont, ctx)) + if (!bn_to_mont_fixed_top(&am, &am, mont, ctx)) goto err; - } else if (!BN_to_montgomery(&am, a, mont, ctx)) + } else if (!bn_to_mont_fixed_top(&am, a, mont, ctx)) goto err; #if defined(SPARC_T4_MONT) @@ -1117,14 +1118,14 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr * performance advantage of sqr over mul). */ if (window > 1) { - if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx)) + if (!bn_mul_mont_fixed_top(&tmp, &am, &am, mont, ctx)) goto err; if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 2, window)) goto err; for (i = 3; i < numPowers; i++) { /* Calculate a^i = a^(i-1) * a */ - if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx)) + if (!bn_mul_mont_fixed_top(&tmp, &am, &tmp, mont, ctx)) goto err; if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, i, window)) @@ -1148,7 +1149,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr /* Scan the window, squaring the result as we go */ for (i = 0; i < window; i++, bits--) { - if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx)) + if (!bn_mul_mont_fixed_top(&tmp, &tmp, &tmp, mont, ctx)) goto err; wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); } @@ -1161,12 +1162,16 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr goto err; /* Multiply the result into the intermediate result */ - if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx)) + if (!bn_mul_mont_fixed_top(&tmp, &tmp, &am, mont, ctx)) goto err; } } - /* Convert the final result from montgomery to standard format */ + /* + * Done with zero-padded intermediate BIGNUMs. Final BN_from_montgomery + * removes padding [if any] and makes return value suitable for public + * API consumer. + */ #if defined(SPARC_T4_MONT) if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3 | SPARCV9_PREFER_FPU)) { am.d[0] = 1; /* borrow am */ diff -up openssl-1.0.2k/crypto/bn/bn.h.rohnp-fix openssl-1.0.2k/crypto/bn/bn.h --- openssl-1.0.2k/crypto/bn/bn.h.rohnp-fix 2018-06-20 17:44:01.752387208 +0200 +++ openssl-1.0.2k/crypto/bn/bn.h 2018-08-14 10:57:21.592518702 +0200 @@ -702,6 +702,16 @@ BIGNUM *bn_dup_expand(const BIGNUM *a, i /* We only need assert() when debugging */ # include +/* + * The new BN_FLG_FIXED_TOP flag marks vectors that were not treated with + * bn_correct_top, in other words such vectors are permitted to have zeros + * in most significant limbs. Such vectors are used internally to achieve + * execution time invariance for critical operations with private keys. + * It's BN_DEBUG-only flag, because user application is not supposed to + * observe it anyway. Moreover, optimizing compiler would actually remove + * all operations manipulating the bit in question in non-BN_DEBUG build. + */ +# define BN_FLG_FIXED_TOP 0x10000 # ifdef BN_DEBUG_RAND /* To avoid "make update" cvs wars due to BN_DEBUG, use some tricks */ # ifndef RAND_pseudo_bytes @@ -734,8 +744,10 @@ int RAND_pseudo_bytes(unsigned char *buf do { \ const BIGNUM *_bnum2 = (a); \ if (_bnum2 != NULL) { \ - assert((_bnum2->top == 0) || \ - (_bnum2->d[_bnum2->top - 1] != 0)); \ + int _top = _bnum2->top; \ + assert((_top == 0) || \ + (_bnum2->flags & BN_FLG_FIXED_TOP) || \ + (_bnum2->d[_top - 1] != 0)); \ bn_pollute(_bnum2); \ } \ } while(0) @@ -753,6 +765,7 @@ int RAND_pseudo_bytes(unsigned char *buf # else /* !BN_DEBUG */ +# define BN_FLG_FIXED_TOP 0 # define bn_pollute(a) # define bn_check_top(a) # define bn_fix_top(a) bn_correct_top(a) diff -up openssl-1.0.2k/crypto/bn/bn_lcl.h.rohnp-fix openssl-1.0.2k/crypto/bn/bn_lcl.h --- openssl-1.0.2k/crypto/bn/bn_lcl.h.rohnp-fix 2018-06-20 17:44:01.748387114 +0200 +++ openssl-1.0.2k/crypto/bn/bn_lcl.h 2018-08-14 10:57:21.596518798 +0200 @@ -113,6 +113,7 @@ # define HEADER_BN_LCL_H # include +# include "bn_int.h" #ifdef __cplusplus extern "C" { diff -up openssl-1.0.2k/crypto/bn/bn_lib.c.rohnp-fix openssl-1.0.2k/crypto/bn/bn_lib.c --- openssl-1.0.2k/crypto/bn/bn_lib.c.rohnp-fix 2017-01-26 14:22:03.000000000 +0100 +++ openssl-1.0.2k/crypto/bn/bn_lib.c 2018-08-14 10:57:21.592518702 +0200 @@ -290,8 +290,6 @@ static BN_ULONG *bn_expand_internal(cons const BN_ULONG *B; int i; - bn_check_top(b); - if (words > (INT_MAX / (4 * BN_BITS2))) { BNerr(BN_F_BN_EXPAND_INTERNAL, BN_R_BIGNUM_TOO_LONG); return NULL; @@ -425,8 +423,6 @@ BIGNUM *bn_dup_expand(const BIGNUM *b, i BIGNUM *bn_expand2(BIGNUM *b, int words) { - bn_check_top(b); - if (words > b->dmax) { BN_ULONG *a = bn_expand_internal(b, words); if (!a) @@ -460,7 +456,6 @@ BIGNUM *bn_expand2(BIGNUM *b, int words) assert(A == &(b->d[b->dmax])); } #endif - bn_check_top(b); return b; } @@ -572,6 +567,7 @@ void BN_clear(BIGNUM *a) OPENSSL_cleanse(a->d, a->dmax * sizeof(a->d[0])); a->top = 0; a->neg = 0; + a->flags &= ~BN_FLG_FIXED_TOP; } BN_ULONG BN_get_word(const BIGNUM *a) @@ -592,6 +588,7 @@ int BN_set_word(BIGNUM *a, BN_ULONG w) a->neg = 0; a->d[0] = w; a->top = (w ? 1 : 0); + a->flags &= ~BN_FLG_FIXED_TOP; bn_check_top(a); return (1); } @@ -738,6 +735,7 @@ int BN_set_bit(BIGNUM *a, int n) for (k = a->top; k < i + 1; k++) a->d[k] = 0; a->top = i + 1; + a->flags &= ~BN_FLG_FIXED_TOP; } a->d[i] |= (((BN_ULONG)1) << j); diff -up openssl-1.0.2k/crypto/bn/bn_mod.c.rohnp-fix openssl-1.0.2k/crypto/bn/bn_mod.c --- openssl-1.0.2k/crypto/bn/bn_mod.c.rohnp-fix 2017-01-26 14:22:03.000000000 +0100 +++ openssl-1.0.2k/crypto/bn/bn_mod.c 2018-08-14 10:57:21.601518919 +0200 @@ -149,18 +149,73 @@ int BN_mod_add(BIGNUM *r, const BIGNUM * /* * BN_mod_add variant that may be used if both a and b are non-negative and - * less than m + * less than m. The original algorithm was + * + * if (!BN_uadd(r, a, b)) + * return 0; + * if (BN_ucmp(r, m) >= 0) + * return BN_usub(r, r, m); + * + * which is replaced with addition, subtracting modulus, and conditional + * move depending on whether or not subtraction borrowed. */ -int BN_mod_add_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, - const BIGNUM *m) +int bn_mod_add_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, + const BIGNUM *m) { - if (!BN_uadd(r, a, b)) + size_t i, ai, bi, mtop = m->top; + BN_ULONG storage[1024 / BN_BITS2]; + BN_ULONG carry, temp, mask, *rp, *tp = storage; + const BN_ULONG *ap, *bp; + + if (bn_wexpand(r, m->top) == NULL) return 0; - if (BN_ucmp(r, m) >= 0) - return BN_usub(r, r, m); + + if (mtop > sizeof(storage) / sizeof(storage[0]) + && (tp = OPENSSL_malloc(mtop * sizeof(BN_ULONG))) == NULL) + return 0; + + ap = a->d != NULL ? a->d : tp; + bp = b->d != NULL ? b->d : tp; + + for (i = 0, ai = 0, bi = 0, carry = 0; i < mtop;) { + mask = (BN_ULONG)0 - ((i - a->top) >> (8 * sizeof(i) - 1)); + temp = ((ap[ai] & mask) + carry) & BN_MASK2; + carry = (temp < carry); + + mask = (BN_ULONG)0 - ((i - b->top) >> (8 * sizeof(i) - 1)); + tp[i] = ((bp[bi] & mask) + temp) & BN_MASK2; + carry += (tp[i] < temp); + + i++; + ai += (i - a->dmax) >> (8 * sizeof(i) - 1); + bi += (i - b->dmax) >> (8 * sizeof(i) - 1); + } + rp = r->d; + carry -= bn_sub_words(rp, tp, m->d, mtop); + for (i = 0; i < mtop; i++) { + rp[i] = (carry & tp[i]) | (~carry & rp[i]); + ((volatile BN_ULONG *)tp)[i] = 0; + } + r->top = mtop; + r->neg = 0; + + if (tp != storage) + OPENSSL_free(tp); + return 1; } +int BN_mod_add_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, + const BIGNUM *m) +{ + int ret = bn_mod_add_fixed_top(r, a, b, m); + + if (ret) + bn_correct_top(r); + + return ret; +} + int BN_mod_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx) { diff -up openssl-1.0.2k/crypto/bn/bn_mont.c.rohnp-fix openssl-1.0.2k/crypto/bn/bn_mont.c --- openssl-1.0.2k/crypto/bn/bn_mont.c.rohnp-fix 2018-08-14 10:57:21.589518629 +0200 +++ openssl-1.0.2k/crypto/bn/bn_mont.c 2018-08-14 11:15:11.425320301 +0200 @@ -56,7 +56,7 @@ * [including the GNU Public Licence.] */ /* ==================================================================== - * Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved. + * Copyright (c) 1998-2018 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -123,12 +123,23 @@ #define MONT_WORD /* use the faster word-based algorithm */ #ifdef MONT_WORD -static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont); +static int bn_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont); #endif int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_MONT_CTX *mont, BN_CTX *ctx) { + int ret = bn_mul_mont_fixed_top(r, a, b, mont, ctx); + + bn_correct_top(r); + bn_check_top(r); + + return ret; +} + +int bn_mul_mont_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, + BN_MONT_CTX *mont, BN_CTX *ctx) +{ BIGNUM *tmp; int ret = 0; #if defined(OPENSSL_BN_ASM_MONT) && defined(MONT_WORD) @@ -140,8 +151,8 @@ int BN_mod_mul_montgomery(BIGNUM *r, con if (bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num)) { r->neg = a->neg ^ b->neg; r->top = num; - bn_correct_top(r); - return (1); + r->flags |= BN_FLG_FIXED_TOP; + return 1; } } #endif @@ -161,13 +172,12 @@ int BN_mod_mul_montgomery(BIGNUM *r, con } /* reduce from aRR to aR */ #ifdef MONT_WORD - if (!BN_from_montgomery_word(r, tmp, mont)) + if (!bn_from_montgomery_word(r, tmp, mont)) goto err; #else if (!BN_from_montgomery(r, tmp, mont, ctx)) goto err; #endif - bn_check_top(r); ret = 1; err: BN_CTX_end(ctx); @@ -175,7 +185,7 @@ int BN_mod_mul_montgomery(BIGNUM *r, con } #ifdef MONT_WORD -static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) +static int bn_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) { BIGNUM *n; BN_ULONG *ap, *np, *rp, n0, v, carry; @@ -205,28 +215,16 @@ static int BN_from_montgomery_word(BIGNU # endif r->top = max; + r->flags |= BN_FLG_FIXED_TOP; n0 = mont->n0[0]; -# ifdef BN_COUNT - fprintf(stderr, "word BN_from_montgomery_word %d * %d\n", nl, nl); -# endif + /* + * Add multiples of |n| to |r| until R = 2^(nl * BN_BITS2) divides it. On + * input, we had |r| < |n| * R, so now |r| < 2 * |n| * R. Note that |r| + * includes |carry| which is stored separately. + */ for (carry = 0, i = 0; i < nl; i++, rp++) { -# ifdef __TANDEM - { - long long t1; - long long t2; - long long t3; - t1 = rp[0] * (n0 & 0177777); - t2 = 037777600000l; - t2 = n0 & t2; - t3 = rp[0] & 0177777; - t2 = (t3 * t2) & BN_MASK2; - t1 = t1 + t2; - v = bn_mul_add_words(rp, np, nl, (BN_ULONG)t1); - } -# else v = bn_mul_add_words(rp, np, nl, (rp[0] * n0) & BN_MASK2); -# endif v = (v + carry + rp[nl]) & BN_MASK2; carry |= (v != rp[nl]); carry &= (v <= rp[nl]); @@ -236,52 +234,27 @@ static int BN_from_montgomery_word(BIGNU if (bn_wexpand(ret, nl) == NULL) return (0); ret->top = nl; + ret->flags |= BN_FLG_FIXED_TOP; ret->neg = r->neg; rp = ret->d; - ap = &(r->d[nl]); -# define BRANCH_FREE 1 -# if BRANCH_FREE - { - BN_ULONG *nrp; - size_t m; + /* + * Shift |nl| words to divide by R. We have |ap| < 2 * |n|. Note that |ap| + * includes |carry| which is stored separately. + */ + ap = &(r->d[nl]); - v = bn_sub_words(rp, ap, np, nl) - carry; - /* - * if subtraction result is real, then trick unconditional memcpy - * below to perform in-place "refresh" instead of actual copy. - */ - m = (0 - (size_t)v); - nrp = - (BN_ULONG *)(((PTR_SIZE_INT) rp & ~m) | ((PTR_SIZE_INT) ap & m)); - - for (i = 0, nl -= 4; i < nl; i += 4) { - BN_ULONG t1, t2, t3, t4; - - t1 = nrp[i + 0]; - t2 = nrp[i + 1]; - t3 = nrp[i + 2]; - ap[i + 0] = 0; - t4 = nrp[i + 3]; - ap[i + 1] = 0; - rp[i + 0] = t1; - ap[i + 2] = 0; - rp[i + 1] = t2; - ap[i + 3] = 0; - rp[i + 2] = t3; - rp[i + 3] = t4; - } - for (nl += 4; i < nl; i++) - rp[i] = nrp[i], ap[i] = 0; + carry -= bn_sub_words(rp, ap, np, nl); + /* + * |carry| is -1 if |ap| - |np| underflowed or zero if it did not. Note + * |carry| cannot be 1. That would imply the subtraction did not fit in + * |nl| words, and we know at most one subtraction is needed. + */ + for (i = 0; i < nl; i++) { + rp[i] = (carry & ap[i]) | (~carry & rp[i]); + ap[i] = 0; } -# else - if (bn_sub_words(rp, ap, np, nl) - carry) - memcpy(rp, ap, nl * sizeof(BN_ULONG)); -# endif - bn_correct_top(r); - bn_correct_top(ret); - bn_check_top(ret); return (1); } @@ -295,8 +268,11 @@ int BN_from_montgomery(BIGNUM *ret, cons BIGNUM *t; BN_CTX_start(ctx); - if ((t = BN_CTX_get(ctx)) && BN_copy(t, a)) - retn = BN_from_montgomery_word(ret, t, mont); + if ((t = BN_CTX_get(ctx)) && BN_copy(t, a)) { + retn = bn_from_montgomery_word(ret, t, mont); + bn_correct_top(ret); + bn_check_top(ret); + } BN_CTX_end(ctx); #else /* !MONT_WORD */ BIGNUM *t1, *t2; @@ -334,6 +310,12 @@ int BN_from_montgomery(BIGNUM *ret, cons return (retn); } +int bn_to_mont_fixed_top(BIGNUM *r, const BIGNUM *a, BN_MONT_CTX *mont, + BN_CTX *ctx) +{ + return bn_mul_mont_fixed_top(r, a, &(mont->RR), mont, ctx); +} + BN_MONT_CTX *BN_MONT_CTX_new(void) { BN_MONT_CTX *ret; @@ -370,7 +352,7 @@ void BN_MONT_CTX_free(BN_MONT_CTX *mont) int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) { - int ret = 0; + int i, ret = 0; BIGNUM *Ri, *R; if (BN_is_zero(mod)) @@ -382,6 +364,8 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, c R = &(mont->RR); /* grab RR as a temp */ if (!BN_copy(&(mont->N), mod)) goto err; /* Set N */ + if (BN_get_flags(mod, BN_FLG_CONSTTIME) != 0) + BN_set_flags(&(mont->N), BN_FLG_CONSTTIME); mont->N.neg = 0; #ifdef MONT_WORD @@ -394,6 +378,9 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, c tmod.dmax = 2; tmod.neg = 0; + if (BN_get_flags(mod, BN_FLG_CONSTTIME) != 0) + BN_set_flags(&tmod, BN_FLG_CONSTTIME); + mont->ri = (BN_num_bits(mod) + (BN_BITS2 - 1)) / BN_BITS2 * BN_BITS2; # if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32) @@ -496,6 +483,11 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, c if (!BN_mod(&(mont->RR), &(mont->RR), &(mont->N), ctx)) goto err; + for (i = mont->RR.top, ret = mont->N.top; i < ret; i++) + mont->RR.d[i] = 0; + mont->RR.top = ret; + mont->RR.flags |= BN_FLG_FIXED_TOP; + ret = 1; err: BN_CTX_end(ctx); diff -up openssl-1.0.2k/crypto/bn/bn_sqr.c.rohnp-fix openssl-1.0.2k/crypto/bn/bn_sqr.c --- openssl-1.0.2k/crypto/bn/bn_sqr.c.rohnp-fix 2017-01-26 14:22:03.000000000 +0100 +++ openssl-1.0.2k/crypto/bn/bn_sqr.c 2018-08-14 10:57:21.593518726 +0200 @@ -135,14 +135,8 @@ int BN_sqr(BIGNUM *r, const BIGNUM *a, B } rr->neg = 0; - /* - * If the most-significant half of the top word of 'a' is zero, then the - * square of 'a' will max-1 words. - */ - if (a->d[al - 1] == (a->d[al - 1] & BN_MASK2l)) - rr->top = max - 1; - else - rr->top = max; + rr->top = max; + bn_correct_top(rr); if (r != rr && BN_copy(r, rr) == NULL) goto err; diff -up openssl-1.0.2k/crypto/bn_int.h.rohnp-fix openssl-1.0.2k/crypto/bn_int.h --- openssl-1.0.2k/crypto/bn_int.h.rohnp-fix 2018-08-14 10:57:21.597518822 +0200 +++ openssl-1.0.2k/crypto/bn_int.h 2018-08-14 10:57:21.599518871 +0200 @@ -0,0 +1,13 @@ +/* + * Some BIGNUM functions assume most significant limb to be non-zero, which + * is customarily arranged by bn_correct_top. Output from below functions + * is not processed with bn_correct_top, and for this reason it may not be + * returned out of public API. It may only be passed internally into other + * functions known to support non-minimal or zero-padded BIGNUMs. + */ +int bn_mul_mont_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, + BN_MONT_CTX *mont, BN_CTX *ctx); +int bn_to_mont_fixed_top(BIGNUM *r, const BIGNUM *a, BN_MONT_CTX *mont, + BN_CTX *ctx); +int bn_mod_add_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, + const BIGNUM *m); diff -up openssl-1.0.2k/crypto/dsa/dsa_ossl.c.rohnp-fix openssl-1.0.2k/crypto/dsa/dsa_ossl.c --- openssl-1.0.2k/crypto/dsa/dsa_ossl.c.rohnp-fix 2018-06-20 17:44:02.153396702 +0200 +++ openssl-1.0.2k/crypto/dsa/dsa_ossl.c 2018-06-20 17:44:02.577406741 +0200 @@ -136,8 +136,7 @@ const DSA_METHOD *DSA_OpenSSL(void) static DSA_SIG *dsa_do_sign(const unsigned char *dgst, int dlen, DSA *dsa) { BIGNUM *kinv = NULL, *r = NULL, *s = NULL; - BIGNUM m; - BIGNUM xr; + BIGNUM *m, *blind, *blindm, *tmp; BN_CTX *ctx = NULL; int reason = ERR_R_BN_LIB; DSA_SIG *ret = NULL; @@ -156,9 +155,6 @@ static DSA_SIG *dsa_do_sign(const unsign } #endif - BN_init(&m); - BN_init(&xr); - if (!dsa->p || !dsa->q || !dsa->g) { reason = DSA_R_MISSING_PARAMETERS; goto err; @@ -170,6 +166,14 @@ static DSA_SIG *dsa_do_sign(const unsign ctx = BN_CTX_new(); if (ctx == NULL) goto err; + BN_CTX_start(ctx); + m = BN_CTX_get(ctx); + blind = BN_CTX_get(ctx); + blindm = BN_CTX_get(ctx); + tmp = BN_CTX_get(ctx); + if (tmp == NULL) + goto err; + redo: if ((dsa->kinv == NULL) || (dsa->r == NULL)) { if (!DSA_sign_setup(dsa, ctx, &kinv, &r)) @@ -189,20 +193,52 @@ static DSA_SIG *dsa_do_sign(const unsign * 4.2 */ dlen = BN_num_bytes(dsa->q); - if (BN_bin2bn(dgst, dlen, &m) == NULL) + if (BN_bin2bn(dgst, dlen, m) == NULL) goto err; - /* Compute s = inv(k) (m + xr) mod q */ - if (!BN_mod_mul(&xr, dsa->priv_key, r, dsa->q, ctx)) - goto err; /* s = xr */ - if (!BN_add(s, &xr, &m)) - goto err; /* s = m + xr */ - if (BN_cmp(s, dsa->q) > 0) - if (!BN_sub(s, s, dsa->q)) + /* + * The normal signature calculation is: + * + * s := k^-1 * (m + r * priv_key) mod q + * + * We will blind this to protect against side channel attacks + * + * s := blind^-1 * k^-1 * (blind * m + blind * r * priv_key) mod q + */ + + /* Generate a blinding value */ + do { + if (!BN_rand(blind, BN_num_bits(dsa->q) - 1, -1, 0)) goto err; + } while (BN_is_zero(blind)); + BN_set_flags(blind, BN_FLG_CONSTTIME); + BN_set_flags(blindm, BN_FLG_CONSTTIME); + BN_set_flags(tmp, BN_FLG_CONSTTIME); + + /* tmp := blind * priv_key * r mod q */ + if (!BN_mod_mul(tmp, blind, dsa->priv_key, dsa->q, ctx)) + goto err; + if (!BN_mod_mul(tmp, tmp, r, dsa->q, ctx)) + goto err; + + /* blindm := blind * m mod q */ + if (!BN_mod_mul(blindm, blind, m, dsa->q, ctx)) + goto err; + + /* s : = (blind * priv_key * r) + (blind * m) mod q */ + if (!BN_mod_add_quick(s, tmp, blindm, dsa->q)) + goto err; + + /* s := s * k^-1 mod q */ if (!BN_mod_mul(s, s, kinv, dsa->q, ctx)) goto err; + /* s:= s * blind^-1 mod q */ + if (BN_mod_inverse(blind, blind, dsa->q, ctx) == NULL) + goto err; + if (!BN_mod_mul(s, s, blind, dsa->q, ctx)) + goto err; + /* * Redo if r or s is zero as required by FIPS 186-3: this is very * unlikely. @@ -226,13 +262,12 @@ static DSA_SIG *dsa_do_sign(const unsign BN_free(r); BN_free(s); } - if (ctx != NULL) + if (ctx != NULL) { + BN_CTX_end(ctx); BN_CTX_free(ctx); - BN_clear_free(&m); - BN_clear_free(&xr); - if (kinv != NULL) /* dsa->kinv is NULL now if we used it */ - BN_clear_free(kinv); - return (ret); + } + BN_clear_free(kinv); + return ret; } static int dsa_sign_setup(DSA *dsa, BN_CTX *ctx_in, BIGNUM **kinvp, diff -up openssl-1.0.2k/crypto/ecdsa/ecs_ossl.c.rohnp-fix openssl-1.0.2k/crypto/ecdsa/ecs_ossl.c --- openssl-1.0.2k/crypto/ecdsa/ecs_ossl.c.rohnp-fix 2018-06-20 17:44:02.205397934 +0200 +++ openssl-1.0.2k/crypto/ecdsa/ecs_ossl.c 2018-08-14 11:18:02.062439755 +0200 @@ -63,6 +63,7 @@ #ifdef OPENSSL_FIPS # include #endif +#include "bn_int.h" static ECDSA_SIG *ecdsa_do_sign(const unsigned char *dgst, int dlen, const BIGNUM *, const BIGNUM *, @@ -98,6 +99,7 @@ static int ecdsa_sign_setup(EC_KEY *ecke EC_POINT *tmp_point = NULL; const EC_GROUP *group; int ret = 0; + int order_bits; if (eckey == NULL || (group = EC_KEY_get0_group(eckey)) == NULL) { ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP, ERR_R_PASSED_NULL_PARAMETER); @@ -129,6 +131,13 @@ static int ecdsa_sign_setup(EC_KEY *ecke goto err; } + /* Preallocate space */ + order_bits = BN_num_bits(order); + if (!BN_set_bit(k, order_bits) + || !BN_set_bit(r, order_bits) + || !BN_set_bit(X, order_bits)) + goto err; + do { /* get random k */ do @@ -142,13 +151,19 @@ static int ecdsa_sign_setup(EC_KEY *ecke /* * We do not want timing information to leak the length of k, so we * compute G*k using an equivalent scalar of fixed bit-length. + * + * We unconditionally perform both of these additions to prevent a + * small timing information leakage. We then choose the sum that is + * one bit longer than the order. This guarantees the code + * path used in the constant time implementations elsewhere. + * + * TODO: revisit the BN_copy aiming for a memory access agnostic + * conditional copy. */ - - if (!BN_add(k, k, order)) + if (!BN_add(r, k, order) + || !BN_add(X, r, order) + || !BN_copy(k, BN_num_bits(r) > order_bits ? r : X)) goto err; - if (BN_num_bits(k) <= BN_num_bits(order)) - if (!BN_add(k, k, order)) - goto err; /* compute r the x-coordinate of generator * k */ if (!EC_POINT_mul(group, tmp_point, k, NULL, NULL, ctx)) { @@ -240,13 +255,14 @@ static ECDSA_SIG *ecdsa_do_sign(const un EC_KEY *eckey) { int ok = 0, i; - BIGNUM *kinv = NULL, *s, *m = NULL, *tmp = NULL, *order = NULL; + BIGNUM *kinv = NULL, *s, *m = NULL, *order = NULL; const BIGNUM *ckinv; BN_CTX *ctx = NULL; const EC_GROUP *group; ECDSA_SIG *ret; ECDSA_DATA *ecdsa; const BIGNUM *priv_key; + BN_MONT_CTX *mont_data; #ifdef OPENSSL_FIPS if (FIPS_selftest_failed()) { @@ -272,7 +288,7 @@ static ECDSA_SIG *ecdsa_do_sign(const un s = ret->s; if ((ctx = BN_CTX_new()) == NULL || (order = BN_new()) == NULL || - (tmp = BN_new()) == NULL || (m = BN_new()) == NULL) { + (m = BN_new()) == NULL) { ECDSAerr(ECDSA_F_ECDSA_DO_SIGN, ERR_R_MALLOC_FAILURE); goto err; } @@ -281,6 +297,8 @@ static ECDSA_SIG *ecdsa_do_sign(const un ECDSAerr(ECDSA_F_ECDSA_DO_SIGN, ERR_R_EC_LIB); goto err; } + mont_data = EC_GROUP_get_mont_data(group); + i = BN_num_bits(order); /* * Need to truncate digest if it is too long: first truncate whole bytes. @@ -311,21 +329,33 @@ static ECDSA_SIG *ecdsa_do_sign(const un } } - if (!BN_mod_mul(tmp, priv_key, ret->r, order, ctx)) { - ECDSAerr(ECDSA_F_ECDSA_DO_SIGN, ERR_R_BN_LIB); + /* + * With only one multiplicant being in Montgomery domain + * multiplication yields real result without post-conversion. + * Also note that all operations but last are performed with + * zero-padded vectors. Last operation, BN_mod_mul_montgomery + * below, returns user-visible value with removed zero padding. + */ + if (!bn_to_mont_fixed_top(s, ret->r, mont_data, ctx) + || !bn_mul_mont_fixed_top(s, s, priv_key, mont_data, ctx)) { goto err; } - if (!BN_mod_add_quick(s, tmp, m, order)) { + if (!bn_mod_add_fixed_top(s, s, m, order)) { ECDSAerr(ECDSA_F_ECDSA_DO_SIGN, ERR_R_BN_LIB); goto err; } - if (!BN_mod_mul(s, s, ckinv, order, ctx)) { + /* + * |s| can still be larger than modulus, because |m| can be. In + * such case we count on Montgomery reduction to tie it up. + */ + if (!bn_to_mont_fixed_top(s, s, mont_data, ctx) + || !BN_mod_mul_montgomery(s, s, ckinv, mont_data, ctx)) { ECDSAerr(ECDSA_F_ECDSA_DO_SIGN, ERR_R_BN_LIB); goto err; } if (BN_is_zero(s)) { /* - * if kinv and r have been supplied by the caller don't to + * if kinv and r have been supplied by the caller don't * generate new kinv and r values */ if (in_kinv != NULL && in_r != NULL) { @@ -349,8 +379,6 @@ static ECDSA_SIG *ecdsa_do_sign(const un BN_CTX_free(ctx); if (m) BN_clear_free(m); - if (tmp) - BN_clear_free(tmp); if (order) BN_free(order); if (kinv) diff -up openssl-1.0.2k/crypto/Makefile.rohnp-fix openssl-1.0.2k/crypto/Makefile --- openssl-1.0.2k/crypto/Makefile.rohnp-fix 2018-06-20 17:44:02.467404137 +0200 +++ openssl-1.0.2k/crypto/Makefile 2018-08-14 10:57:21.595518774 +0200 @@ -45,7 +45,7 @@ SRC= $(LIBSRC) EXHEADER= crypto.h opensslv.h opensslconf.h ebcdic.h symhacks.h \ ossl_typ.h HEADER= cryptlib.h buildinf.h md32_common.h o_time.h o_str.h o_dir.h \ - constant_time_locl.h $(EXHEADER) + constant_time_locl.h bn_int.h $(EXHEADER) ALL= $(GENERAL) $(SRC) $(HEADER)