diff -up openssl-1.0.2k/crypto/bn/bn_exp.c.one-and-done openssl-1.0.2k/crypto/bn/bn_exp.c --- openssl-1.0.2k/crypto/bn/bn_exp.c.one-and-done 2019-04-04 16:46:21.287257363 +0200 +++ openssl-1.0.2k/crypto/bn/bn_exp.c 2019-04-04 16:45:32.875130057 +0200 @@ -579,7 +579,6 @@ int BN_mod_exp_mont(BIGNUM *rr, const BI return (ret); } -#if defined(SPARC_T4_MONT) static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos) { BN_ULONG ret = 0; @@ -598,7 +597,6 @@ static BN_ULONG bn_get_bits(const BIGNUM return ret & BN_MASK2; } -#endif /* * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific @@ -697,7 +695,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) { - int i, bits, ret = 0, window, wvalue; + int i, bits, ret = 0, window, wvalue, wmask, window0; int top; BN_MONT_CTX *mont = NULL; @@ -945,20 +943,27 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr top /= 2; bn_flip_t4(np, mont->N.d, top); - bits--; - for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) - wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); + /* + * The exponent may not have a whole number of fixed-size windows. + * To simplify the main loop, the initial window has between 1 and + * full-window-size bits such that what remains is always a whole + * number of windows + */ + window0 = (bits - 1) % 5 + 1; + wmask = (1 << window0) - 1; + bits -= window0; + wvalue = bn_get_bits(p, bits) & wmask; bn_gather5_t4(tmp.d, top, powerbuf, wvalue); /* * Scan the exponent one window at a time starting from the most * significant bits. */ - while (bits >= 0) { + while (bits > 0) { if (bits < stride) - stride = bits + 1; + stride = bits; bits -= stride; - wvalue = bn_get_bits(p, bits + 1); + wvalue = bn_get_bits(p, bits); if ((*pwr5_worker) (tmp.d, np, n0, powerbuf, wvalue, stride)) continue; @@ -1066,32 +1071,36 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr bn_scatter5(tmp.d, top, powerbuf, i); } # endif - bits--; - for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) - wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); + /* + * The exponent may not have a whole number of fixed-size windows. + * To simplify the main loop, the initial window has between 1 and + * full-window-size bits such that what remains is always a whole + * number of windows + */ + window0 = (bits - 1) % 5 + 1; + wmask = (1 << window0) - 1; + bits -= window0; + wvalue = bn_get_bits(p, bits) & wmask; bn_gather5(tmp.d, top, powerbuf, wvalue); /* * Scan the exponent one window at a time starting from the most * significant bits. */ - if (top & 7) - while (bits >= 0) { - for (wvalue = 0, i = 0; i < 5; i++, bits--) - wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); - + if (top & 7) { + while (bits > 0) { bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, - wvalue); + bn_get_bits5(p->d, bits -= 5)); + } } else { - while (bits >= 0) { - wvalue = bn_get_bits5(p->d, bits - 4); - bits -= 5; - bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); + while (bits > 0) { + bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, + bn_get_bits5(p->d, bits -= 5)); } } @@ -1133,28 +1142,45 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr } } - bits--; - for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) - wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); + /* + * The exponent may not have a whole number of fixed-size windows. + * To simplify the main loop, the initial window has between 1 and + * full-window-size bits such that what remains is always a whole + * number of windows + */ + window0 = (bits - 1) % window + 1; + wmask = (1 << window0) - 1; + bits -= window0; + wvalue = bn_get_bits(p, bits) & wmask; if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf, wvalue, window)) goto err; + wmask = (1 << window) - 1; /* * Scan the exponent one window at a time starting from the most * significant bits. */ - while (bits >= 0) { - wvalue = 0; /* The 'value' of the window */ + while (bits > 0) { - /* Scan the window, squaring the result as we go */ - for (i = 0; i < window; i++, bits--) { + /* Square the result window-size times */ + for (i = 0; i < window; i++) if (!bn_mul_mont_fixed_top(&tmp, &tmp, &tmp, mont, ctx)) goto err; - wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); - } /* + * Get a window's worth of bits from the exponent + * This avoids calling BN_is_bit_set for each bit, which + * is not only slower but also makes each bit vulnerable to + * EM (and likely other) side-channel attacks like One&Done + * (for details see "One&Done: A Single-Decryption EM-Based + * Attack on OpenSSL's Constant-Time Blinded RSA" by M. Alam, + * H. Khan, M. Dey, N. Sinha, R. Callan, A. Zajic, and + * M. Prvulovic, in USENIX Security'18) + */ + bits -= window; + wvalue = bn_get_bits(p, bits) & wmask; + /* * Fetch the appropriate pre-computed value from the pre-buf */ if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, wvalue,