Blob Blame Raw
diff -up openssl-1.0.2k/crypto/bn/bn_blind.c.9-lives openssl-1.0.2k/crypto/bn/bn_blind.c
--- openssl-1.0.2k/crypto/bn/bn_blind.c.9-lives	2017-01-26 14:22:03.000000000 +0100
+++ openssl-1.0.2k/crypto/bn/bn_blind.c	2019-04-05 10:50:56.136104388 +0200
@@ -206,10 +206,15 @@ int BN_BLINDING_update(BN_BLINDING *b, B
         if (!BN_BLINDING_create_param(b, NULL, NULL, ctx, NULL, NULL))
             goto err;
     } else if (!(b->flags & BN_BLINDING_NO_UPDATE)) {
-        if (!BN_mod_mul(b->A, b->A, b->A, b->mod, ctx))
-            goto err;
-        if (!BN_mod_mul(b->Ai, b->Ai, b->Ai, b->mod, ctx))
-            goto err;
+        if (b->m_ctx != NULL) {
+            if (!bn_mul_mont_fixed_top(b->Ai, b->Ai, b->Ai, b->m_ctx, ctx)
+                || !bn_mul_mont_fixed_top(b->A, b->A, b->A, b->m_ctx, ctx))
+                goto err;
+        } else {
+            if (!BN_mod_mul(b->Ai, b->Ai, b->Ai, b->mod, ctx)
+                || !BN_mod_mul(b->A, b->A, b->A, b->mod, ctx))
+                goto err;
+        }
     }
 
     ret = 1;
@@ -241,13 +246,13 @@ int BN_BLINDING_convert_ex(BIGNUM *n, BI
     else if (!BN_BLINDING_update(b, ctx))
         return (0);
 
-    if (r != NULL) {
-        if (!BN_copy(r, b->Ai))
-            ret = 0;
-    }
+    if (r != NULL && (BN_copy(r, b->Ai) == NULL))
+        return 0;
 
-    if (!BN_mod_mul(n, n, b->A, b->mod, ctx))
-        ret = 0;
+    if (b->m_ctx != NULL)
+        ret = BN_mod_mul_montgomery(n, n, b->A, b->m_ctx, ctx);
+    else
+        ret = BN_mod_mul(n, n, b->A, b->mod, ctx);
 
     return ret;
 }
@@ -264,14 +269,29 @@ int BN_BLINDING_invert_ex(BIGNUM *n, con
 
     bn_check_top(n);
 
-    if (r != NULL)
-        ret = BN_mod_mul(n, n, r, b->mod, ctx);
-    else {
-        if (b->Ai == NULL) {
-            BNerr(BN_F_BN_BLINDING_INVERT_EX, BN_R_NOT_INITIALIZED);
-            return (0);
+    if (r == NULL && (r = b->Ai) == NULL) {
+        BNerr(BN_F_BN_BLINDING_INVERT_EX, BN_R_NOT_INITIALIZED);
+        return 0;
+    }
+
+    if (b->m_ctx != NULL) {
+        /* ensure that BN_mod_mul_montgomery takes pre-defined path */
+        if (n->dmax >= r->top) {
+            size_t i, rtop = r->top, ntop = n->top;
+            BN_ULONG mask;
+
+            for (i = 0; i < rtop; i++) {
+                mask = (BN_ULONG)0 - ((i - ntop) >> (8 * sizeof(i) - 1));
+                n->d[i] &= mask;
+            }
+            mask = (BN_ULONG)0 - ((rtop - ntop) >> (8 * sizeof(ntop) - 1));
+            /* always true, if (rtop >= ntop) n->top = r->top; */
+            n->top = (int)(rtop & ~mask) | (ntop & mask);
+            n->flags |= (BN_FLG_FIXED_TOP & ~mask);
         }
-        ret = BN_mod_mul(n, n, b->Ai, b->mod, ctx);
+        ret = BN_mod_mul_montgomery(n, n, r, b->m_ctx, ctx);
+    } else {
+        ret = BN_mod_mul(n, n, r, b->mod, ctx);
     }
 
     bn_check_top(n);
@@ -366,14 +386,19 @@ BN_BLINDING *BN_BLINDING_create_param(BN
     } while (1);
 
     if (ret->bn_mod_exp != NULL && ret->m_ctx != NULL) {
-        if (!ret->bn_mod_exp
-            (ret->A, ret->A, ret->e, ret->mod, ctx, ret->m_ctx))
+        if (!ret->bn_mod_exp(ret->A, ret->A, ret->e, ret->mod, ctx, ret->m_ctx))
             goto err;
     } else {
         if (!BN_mod_exp(ret->A, ret->A, ret->e, ret->mod, ctx))
             goto err;
     }
 
+    if (ret->m_ctx != NULL) {
+        if (!bn_to_mont_fixed_top(ret->Ai, ret->Ai, ret->m_ctx, ctx)
+            || !bn_to_mont_fixed_top(ret->A, ret->A, ret->m_ctx, ctx))
+            goto err;
+    }
+
     return ret;
  err:
     if (b == NULL && ret != NULL) {
diff -up openssl-1.0.2k/crypto/bn/bn_lib.c.9-lives openssl-1.0.2k/crypto/bn/bn_lib.c
--- openssl-1.0.2k/crypto/bn/bn_lib.c.9-lives	2019-04-05 10:50:56.128104529 +0200
+++ openssl-1.0.2k/crypto/bn/bn_lib.c	2019-04-05 10:50:56.136104388 +0200
@@ -144,74 +144,47 @@ const BIGNUM *BN_value_one(void)
 
 int BN_num_bits_word(BN_ULONG l)
 {
-    static const unsigned char bits[256] = {
-        0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
-        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-    };
-
-#if defined(SIXTY_FOUR_BIT_LONG)
-    if (l & 0xffffffff00000000L) {
-        if (l & 0xffff000000000000L) {
-            if (l & 0xff00000000000000L) {
-                return (bits[(int)(l >> 56)] + 56);
-            } else
-                return (bits[(int)(l >> 48)] + 48);
-        } else {
-            if (l & 0x0000ff0000000000L) {
-                return (bits[(int)(l >> 40)] + 40);
-            } else
-                return (bits[(int)(l >> 32)] + 32);
-        }
-    } else
-#else
-# ifdef SIXTY_FOUR_BIT
-    if (l & 0xffffffff00000000LL) {
-        if (l & 0xffff000000000000LL) {
-            if (l & 0xff00000000000000LL) {
-                return (bits[(int)(l >> 56)] + 56);
-            } else
-                return (bits[(int)(l >> 48)] + 48);
-        } else {
-            if (l & 0x0000ff0000000000LL) {
-                return (bits[(int)(l >> 40)] + 40);
-            } else
-                return (bits[(int)(l >> 32)] + 32);
-        }
-    } else
-# endif
-#endif
-    {
-#if defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)
-        if (l & 0xffff0000L) {
-            if (l & 0xff000000L)
-                return (bits[(int)(l >> 24L)] + 24);
-            else
-                return (bits[(int)(l >> 16L)] + 16);
-        } else
-#endif
-        {
-#if defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)
-            if (l & 0xff00L)
-                return (bits[(int)(l >> 8)] + 8);
-            else
+    BN_ULONG x, mask;
+    int bits = (l != 0);
+
+#if BN_BITS2 > 32
+    x = l >> 32;
+    mask = (0 - x) & BN_MASK2;
+    mask = (0 - (mask >> (BN_BITS2 - 1)));
+    bits += 32 & mask;
+    l ^= (x ^ l) & mask;
 #endif
-                return (bits[(int)(l)]);
-        }
-    }
+
+    x = l >> 16;
+    mask = (0 - x) & BN_MASK2;
+    mask = (0 - (mask >> (BN_BITS2 - 1)));
+    bits += 16 & mask;
+    l ^= (x ^ l) & mask;
+
+    x = l >> 8;
+    mask = (0 - x) & BN_MASK2;
+    mask = (0 - (mask >> (BN_BITS2 - 1)));
+    bits += 8 & mask;
+    l ^= (x ^ l) & mask;
+
+    x = l >> 4;
+    mask = (0 - x) & BN_MASK2;
+    mask = (0 - (mask >> (BN_BITS2 - 1)));
+    bits += 4 & mask;
+    l ^= (x ^ l) & mask;
+
+    x = l >> 2;
+    mask = (0 - x) & BN_MASK2;
+    mask = (0 - (mask >> (BN_BITS2 - 1)));
+    bits += 2 & mask;
+    l ^= (x ^ l) & mask;
+
+    x = l >> 1;
+    mask = (0 - x) & BN_MASK2;
+    mask = (0 - (mask >> (BN_BITS2 - 1)));
+    bits += 1 & mask;
+
+    return bits;
 }
 
 int BN_num_bits(const BIGNUM *a)
@@ -519,12 +492,18 @@ BIGNUM *BN_copy(BIGNUM *a, const BIGNUM
     memcpy(a->d, b->d, sizeof(b->d[0]) * b->top);
 #endif
 
-    a->top = b->top;
     a->neg = b->neg;
+    a->top = b->top;
+    a->flags |= b->flags & BN_FLG_FIXED_TOP;
     bn_check_top(a);
     return (a);
 }
 
+#define FLAGS_DATA(flags) ((flags) & (BN_FLG_STATIC_DATA \
+                                    | BN_FLG_CONSTTIME   \
+                                    | BN_FLG_FIXED_TOP))
+#define FLAGS_STRUCT(flags) ((flags) & (BN_FLG_MALLOCED))
+
 void BN_swap(BIGNUM *a, BIGNUM *b)
 {
     int flags_old_a, flags_old_b;
@@ -552,10 +531,8 @@ void BN_swap(BIGNUM *a, BIGNUM *b)
     b->dmax = tmp_dmax;
     b->neg = tmp_neg;
 
-    a->flags =
-        (flags_old_a & BN_FLG_MALLOCED) | (flags_old_b & BN_FLG_STATIC_DATA);
-    b->flags =
-        (flags_old_b & BN_FLG_MALLOCED) | (flags_old_a & BN_FLG_STATIC_DATA);
+    a->flags = FLAGS_STRUCT(flags_old_a) | FLAGS_DATA(flags_old_b);
+    b->flags = FLAGS_STRUCT(flags_old_b) | FLAGS_DATA(flags_old_a);
     bn_check_top(a);
     bn_check_top(b);
 }
@@ -637,6 +614,55 @@ BIGNUM *BN_bin2bn(const unsigned char *s
 }
 
 /* ignore negative */
+static int bn2binpad(const BIGNUM *a, unsigned char *to, int tolen)
+{
+    int n;
+    size_t i, lasti, j, atop, mask;
+    BN_ULONG l;
+
+    /*
+     * In case |a| is fixed-top, BN_num_bytes can return bogus length,
+     * but it's assumed that fixed-top inputs ought to be "nominated"
+     * even for padded output, so it works out...
+     */
+    n = BN_num_bytes(a);
+    if (tolen == -1) {
+        tolen = n;
+    } else if (tolen < n) {     /* uncommon/unlike case */
+        BIGNUM temp = *a;
+
+        bn_correct_top(&temp);
+        n = BN_num_bytes(&temp);
+        if (tolen < n)
+            return -1;
+    }
+
+    /* Swipe through whole available data and don't give away padded zero. */
+    atop = a->dmax * BN_BYTES;
+    if (atop == 0) {
+        OPENSSL_cleanse(to, tolen);
+        return tolen;
+    }
+
+    lasti = atop - 1;
+    atop = a->top * BN_BYTES;
+    for (i = 0, j = 0, to += tolen; j < (size_t)tolen; j++) {
+        l = a->d[i / BN_BYTES];
+        mask = 0 - ((j - atop) >> (8 * sizeof(i) - 1));
+        *--to = (unsigned char)(l >> (8 * (i % BN_BYTES)) & mask);
+        i += (i - lasti) >> (8 * sizeof(i) - 1); /* stay on last limb */
+    }
+
+    return tolen;
+}
+
+int bn_bn2binpad(const BIGNUM *a, unsigned char *to, int tolen)
+{
+    if (tolen < 0)
+        return -1;
+    return bn2binpad(a, to, tolen);
+}
+
 int BN_bn2bin(const BIGNUM *a, unsigned char *to)
 {
     int n, i;
@@ -810,6 +836,9 @@ int bn_cmp_words(const BN_ULONG *a, cons
     int i;
     BN_ULONG aa, bb;
 
+    if (n == 0)
+        return 0;
+
     aa = a[n - 1];
     bb = b[n - 1];
     if (aa != bb)
diff -up openssl-1.0.2k/crypto/bn/bn_mod.c.9-lives openssl-1.0.2k/crypto/bn/bn_mod.c
--- openssl-1.0.2k/crypto/bn/bn_mod.c.9-lives	2019-04-05 10:50:56.125104581 +0200
+++ openssl-1.0.2k/crypto/bn/bn_mod.c	2019-04-05 10:50:56.136104388 +0200
@@ -197,6 +197,7 @@ int bn_mod_add_fixed_top(BIGNUM *r, cons
         ((volatile BN_ULONG *)tp)[i] = 0;
     }
     r->top = mtop;
+    r->flags |= BN_FLG_FIXED_TOP;
     r->neg = 0;
 
     if (tp != storage)
@@ -225,6 +226,70 @@ int BN_mod_sub(BIGNUM *r, const BIGNUM *
 }
 
 /*
+ * BN_mod_sub variant that may be used if both a and b are non-negative,
+ * a is less than m, while b is of same bit width as m. It's implemented
+ * as subtraction followed by two conditional additions.
+ *
+ * 0 <= a < m
+ * 0 <= b < 2^w < 2*m
+ *
+ * after subtraction
+ *
+ * -2*m < r = a - b < m
+ *
+ * Thus it takes up to two conditional additions to make |r| positive.
+ */
+int bn_mod_sub_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
+                         const BIGNUM *m)
+{
+    size_t i, ai, bi, mtop = m->top;
+    BN_ULONG borrow, carry, ta, tb, mask, *rp;
+    const BN_ULONG *ap, *bp;
+
+    if (bn_wexpand(r, m->top) == NULL)
+        return 0;
+
+    rp = r->d;
+    ap = a->d != NULL ? a->d : rp;
+    bp = b->d != NULL ? b->d : rp;
+
+    for (i = 0, ai = 0, bi = 0, borrow = 0; i < mtop;) {
+        mask = (BN_ULONG)0 - ((i - a->top) >> (8 * sizeof(i) - 1));
+        ta = ap[ai] & mask;
+
+        mask = (BN_ULONG)0 - ((i - b->top) >> (8 * sizeof(i) - 1));
+        tb = bp[bi] & mask;
+        rp[i] = ta - tb - borrow;
+        if (ta != tb)
+            borrow = (ta < tb);
+
+        i++;
+        ai += (i - a->dmax) >> (8 * sizeof(i) - 1);
+        bi += (i - b->dmax) >> (8 * sizeof(i) - 1);
+    }
+    ap = m->d;
+    for (i = 0, mask = 0 - borrow, carry = 0; i < mtop; i++) {
+        ta = ((ap[i] & mask) + carry) & BN_MASK2;
+        carry = (ta < carry);
+        rp[i] = (rp[i] + ta) & BN_MASK2;
+        carry += (rp[i] < ta);
+    }
+    borrow -= carry;
+    for (i = 0, mask = 0 - borrow, carry = 0; i < mtop; i++) {
+        ta = ((ap[i] & mask) + carry) & BN_MASK2;
+        carry = (ta < carry);
+        rp[i] = (rp[i] + ta) & BN_MASK2;
+        carry += (rp[i] < ta);
+    }
+
+    r->top = mtop;
+    r->flags |= BN_FLG_FIXED_TOP;
+    r->neg = 0;
+
+    return 1;
+}
+
+/*
  * BN_mod_sub variant that may be used if both a and b are non-negative and
  * less than m
  */
diff -up openssl-1.0.2k/crypto/bn/bn_mont.c.9-lives openssl-1.0.2k/crypto/bn/bn_mont.c
--- openssl-1.0.2k/crypto/bn/bn_mont.c.9-lives	2019-04-05 10:50:56.125104581 +0200
+++ openssl-1.0.2k/crypto/bn/bn_mont.c	2019-04-05 10:50:56.137104370 +0200
@@ -164,10 +164,10 @@ int bn_mul_mont_fixed_top(BIGNUM *r, con
 
     bn_check_top(tmp);
     if (a == b) {
-        if (!BN_sqr(tmp, a, ctx))
+        if (!bn_sqr_fixed_top(tmp, a, ctx))
             goto err;
     } else {
-        if (!BN_mul(tmp, a, b, ctx))
+        if (!bn_mul_fixed_top(tmp, a, b, ctx))
             goto err;
     }
     /* reduce from aRR to aR */
@@ -190,6 +190,7 @@ static int bn_from_montgomery_word(BIGNU
     BIGNUM *n;
     BN_ULONG *ap, *np, *rp, n0, v, carry;
     int nl, max, i;
+    unsigned int rtop;
 
     n = &(mont->N);
     nl = n->top;
@@ -207,12 +208,10 @@ static int bn_from_montgomery_word(BIGNU
     rp = r->d;
 
     /* clear the top words of T */
-# if 1
-    for (i = r->top; i < max; i++) /* memset? XXX */
-        rp[i] = 0;
-# else
-    memset(&(rp[r->top]), 0, (max - r->top) * sizeof(BN_ULONG));
-# endif
+    for (rtop = r->top, i = 0; i < max; i++) {
+        v = (BN_ULONG)0 - ((i - rtop) >> (8 * sizeof(rtop) - 1));
+        rp[i] &= v;
+    }
 
     r->top = max;
     r->flags |= BN_FLG_FIXED_TOP;
@@ -263,6 +262,18 @@ static int bn_from_montgomery_word(BIGNU
 int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
                        BN_CTX *ctx)
 {
+    int retn;
+
+    retn = bn_from_mont_fixed_top(ret, a, mont, ctx);
+    bn_correct_top(ret);
+    bn_check_top(ret);
+
+    return retn;
+}
+
+int bn_from_mont_fixed_top(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
+                           BN_CTX *ctx)
+{
     int retn = 0;
 #ifdef MONT_WORD
     BIGNUM *t;
@@ -270,8 +281,6 @@ int BN_from_montgomery(BIGNUM *ret, cons
     BN_CTX_start(ctx);
     if ((t = BN_CTX_get(ctx)) && BN_copy(t, a)) {
         retn = bn_from_montgomery_word(ret, t, mont);
-        bn_correct_top(ret);
-        bn_check_top(ret);
     }
     BN_CTX_end(ctx);
 #else                           /* !MONT_WORD */
diff -up openssl-1.0.2k/crypto/bn/bn_mul.c.9-lives openssl-1.0.2k/crypto/bn/bn_mul.c
--- openssl-1.0.2k/crypto/bn/bn_mul.c.9-lives	2017-01-26 14:22:03.000000000 +0100
+++ openssl-1.0.2k/crypto/bn/bn_mul.c	2019-04-05 10:50:56.137104370 +0200
@@ -936,6 +936,16 @@ void bn_mul_high(BN_ULONG *r, BN_ULONG *
 
 int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
 {
+    int ret = bn_mul_fixed_top(r, a, b, ctx);
+
+    bn_correct_top(r);
+    bn_check_top(r);
+
+    return ret;
+}
+
+int bn_mul_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
+{
     int ret = 0;
     int top, al, bl;
     BIGNUM *rr;
@@ -1032,46 +1042,6 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, c
             rr->top = top;
             goto end;
         }
-# if 0
-        if (i == 1 && !BN_get_flags(b, BN_FLG_STATIC_DATA)) {
-            BIGNUM *tmp_bn = (BIGNUM *)b;
-            if (bn_wexpand(tmp_bn, al) == NULL)
-                goto err;
-            tmp_bn->d[bl] = 0;
-            bl++;
-            i--;
-        } else if (i == -1 && !BN_get_flags(a, BN_FLG_STATIC_DATA)) {
-            BIGNUM *tmp_bn = (BIGNUM *)a;
-            if (bn_wexpand(tmp_bn, bl) == NULL)
-                goto err;
-            tmp_bn->d[al] = 0;
-            al++;
-            i++;
-        }
-        if (i == 0) {
-            /* symmetric and > 4 */
-            /* 16 or larger */
-            j = BN_num_bits_word((BN_ULONG)al);
-            j = 1 << (j - 1);
-            k = j + j;
-            t = BN_CTX_get(ctx);
-            if (al == j) {      /* exact multiple */
-                if (bn_wexpand(t, k * 2) == NULL)
-                    goto err;
-                if (bn_wexpand(rr, k * 2) == NULL)
-                    goto err;
-                bn_mul_recursive(rr->d, a->d, b->d, al, t->d);
-            } else {
-                if (bn_wexpand(t, k * 4) == NULL)
-                    goto err;
-                if (bn_wexpand(rr, k * 4) == NULL)
-                    goto err;
-                bn_mul_part_recursive(rr->d, a->d, b->d, al - j, j, t->d);
-            }
-            rr->top = top;
-            goto end;
-        }
-# endif
     }
 #endif                          /* BN_RECURSION */
     if (bn_wexpand(rr, top) == NULL)
@@ -1082,7 +1052,7 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, c
 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
  end:
 #endif
-    bn_correct_top(rr);
+    rr->flags |= BN_FLG_FIXED_TOP;
     if (r != rr && BN_copy(r, rr) == NULL)
         goto err;
 
diff -up openssl-1.0.2k/crypto/bn/bn_sqr.c.9-lives openssl-1.0.2k/crypto/bn/bn_sqr.c
--- openssl-1.0.2k/crypto/bn/bn_sqr.c.9-lives	2019-04-05 10:50:56.125104581 +0200
+++ openssl-1.0.2k/crypto/bn/bn_sqr.c	2019-04-05 10:50:56.137104370 +0200
@@ -66,6 +66,16 @@
  */
 int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx)
 {
+    int ret = bn_sqr_fixed_top(r, a, ctx);
+
+    bn_correct_top(r);
+    bn_check_top(r);
+
+    return ret;
+}
+
+int bn_sqr_fixed_top(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx)
+{
     int max, al;
     int ret = 0;
     BIGNUM *tmp, *rr;
@@ -136,7 +146,7 @@ int BN_sqr(BIGNUM *r, const BIGNUM *a, B
 
     rr->neg = 0;
     rr->top = max;
-    bn_correct_top(rr);
+    rr->flags |= BN_FLG_FIXED_TOP;
     if (r != rr && BN_copy(r, rr) == NULL)
         goto err;
 
diff -up openssl-1.0.2k/crypto/bn_int.h.9-lives openssl-1.0.2k/crypto/bn_int.h
--- openssl-1.0.2k/crypto/bn_int.h.9-lives	2019-04-05 10:50:56.125104581 +0200
+++ openssl-1.0.2k/crypto/bn_int.h	2019-04-05 10:50:56.137104370 +0200
@@ -7,7 +7,15 @@
  */
 int bn_mul_mont_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
                           BN_MONT_CTX *mont, BN_CTX *ctx);
+int bn_from_mont_fixed_top(BIGNUM *r, const BIGNUM *a, BN_MONT_CTX *mont,
+                           BN_CTX *ctx);
 int bn_to_mont_fixed_top(BIGNUM *r, const BIGNUM *a, BN_MONT_CTX *mont,
                          BN_CTX *ctx);
 int bn_mod_add_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
                          const BIGNUM *m);
+int bn_mod_sub_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
+                         const BIGNUM *m);
+int bn_mul_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
+int bn_sqr_fixed_top(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx);
+
+int bn_bn2binpad(const BIGNUM *a, unsigned char *to, int tolen);
diff -up openssl-1.0.2k/crypto/constant_time_locl.h.9-lives openssl-1.0.2k/crypto/constant_time_locl.h
--- openssl-1.0.2k/crypto/constant_time_locl.h.9-lives	2019-04-05 10:50:55.545114779 +0200
+++ openssl-1.0.2k/crypto/constant_time_locl.h	2019-04-05 10:50:56.137104370 +0200
@@ -204,6 +204,12 @@ static inline int constant_time_select_i
     return (int)(constant_time_select(mask, (unsigned)(a), (unsigned)(b)));
 }
 
+/*
+ * Expected usage pattern is to unconditionally set error and then
+ * wipe it if there was no actual error. |clear| is 1 or 0.
+ */
+void err_clear_last_constant_time(int clear);
+
 #ifdef __cplusplus
 }
 #endif
diff -up openssl-1.0.2k/crypto/err/err.c.9-lives openssl-1.0.2k/crypto/err/err.c
--- openssl-1.0.2k/crypto/err/err.c.9-lives	2017-01-26 14:22:03.000000000 +0100
+++ openssl-1.0.2k/crypto/err/err.c	2019-04-05 10:50:56.138104353 +0200
@@ -118,6 +118,7 @@
 #include <openssl/buffer.h>
 #include <openssl/bio.h>
 #include <openssl/err.h>
+#include "constant_time_locl.h"
 
 DECLARE_LHASH_OF(ERR_STRING_DATA);
 DECLARE_LHASH_OF(ERR_STATE);
@@ -819,8 +820,24 @@ static unsigned long get_error_values(in
         return ERR_R_INTERNAL_ERROR;
     }
 
+    while (es->bottom != es->top) {
+        if (es->err_flags[es->top] & ERR_FLAG_CLEAR) {
+            err_clear(es, es->top);
+            es->top = es->top > 0 ? es->top - 1 : ERR_NUM_ERRORS - 1;
+            continue;
+        }
+        i = (es->bottom + 1) % ERR_NUM_ERRORS;
+        if (es->err_flags[i] & ERR_FLAG_CLEAR) {
+            es->bottom = i;
+            err_clear(es, es->bottom);
+            continue;
+        }
+        break;
+    }
+
     if (es->bottom == es->top)
         return 0;
+
     if (top)
         i = es->top;            /* last error */
     else
@@ -1146,3 +1163,23 @@ int ERR_pop_to_mark(void)
     es->err_flags[es->top] &= ~ERR_FLAG_MARK;
     return 1;
 }
+
+void err_clear_last_constant_time(int clear)
+{
+    ERR_STATE *es;
+    int top;
+
+    es = ERR_get_state();
+    if (es == NULL)
+        return;
+
+    top = es->top;
+
+    /*
+     * Flag error as cleared but remove it elsewhere to avoid two errors
+     * accessing the same error stack location, revealing timing information.
+     */
+    clear = constant_time_select_int(constant_time_eq_int(clear, 0),
+                                     0, ERR_FLAG_CLEAR);
+    es->err_flags[top] |= clear;
+}
diff -up openssl-1.0.2k/crypto/err/err.h.9-lives openssl-1.0.2k/crypto/err/err.h
--- openssl-1.0.2k/crypto/err/err.h.9-lives	2019-04-05 10:50:55.450116449 +0200
+++ openssl-1.0.2k/crypto/err/err.h	2019-04-05 11:14:57.689757981 +0200
@@ -143,6 +143,7 @@ extern "C" {
 # define ERR_TXT_STRING          0x02
 
 # define ERR_FLAG_MARK           0x01
+# define ERR_FLAG_CLEAR          0x02
 
 # define ERR_NUM_ERRORS  16
 typedef struct err_state_st {
diff -up openssl-1.0.2k/crypto/rsa/rsa_eay.c.9-lives openssl-1.0.2k/crypto/rsa/rsa_eay.c
--- openssl-1.0.2k/crypto/rsa/rsa_eay.c.9-lives	2019-04-05 10:50:55.998106814 +0200
+++ openssl-1.0.2k/crypto/rsa/rsa_eay.c	2019-04-05 10:50:56.138104353 +0200
@@ -118,6 +118,8 @@
 #ifdef OPENSSL_FIPS
 # include <openssl/fips.h>
 #endif
+#include "bn_int.h"
+#include "constant_time_locl.h"
 
 #ifndef RSA_NULL
 
@@ -160,7 +162,7 @@ static int RSA_eay_public_encrypt(int fl
                                   unsigned char *to, RSA *rsa, int padding)
 {
     BIGNUM *f, *ret;
-    int i, j, k, num = 0, r = -1;
+    int i, num = 0, r = -1;
     unsigned char *buf = NULL;
     BN_CTX *ctx = NULL;
 
@@ -252,15 +254,10 @@ static int RSA_eay_public_encrypt(int fl
         goto err;
 
     /*
-     * put in leading 0 bytes if the number is less than the length of the
-     * modulus
+     * BN_bn2binpad puts in leading 0 bytes if the number is less than
+     * the length of the modulus.
      */
-    j = BN_num_bytes(ret);
-    i = BN_bn2bin(ret, &(to[num - j]));
-    for (k = 0; k < (num - i); k++)
-        to[k] = 0;
-
-    r = num;
+    r = bn_bn2binpad(ret, to, num);
  err:
     if (ctx != NULL) {
         BN_CTX_end(ctx);
@@ -369,7 +366,7 @@ static int RSA_eay_private_encrypt(int f
                                    unsigned char *to, RSA *rsa, int padding)
 {
     BIGNUM *f, *ret, *res;
-    int i, j, k, num = 0, r = -1;
+    int i, num = 0, r = -1;
     unsigned char *buf = NULL;
     BN_CTX *ctx = NULL;
     int local_blinding = 0;
@@ -437,6 +434,11 @@ static int RSA_eay_private_encrypt(int f
         goto err;
     }
 
+    if (rsa->flags & RSA_FLAG_CACHE_PUBLIC)
+        if (!BN_MONT_CTX_set_locked(&rsa->_method_mod_n, CRYPTO_LOCK_RSA,
+                                    rsa->n, ctx))
+            goto err;
+
     if (!(rsa->flags & RSA_FLAG_NO_BLINDING)) {
         blinding = rsa_get_blinding(rsa, &local_blinding, ctx);
         if (blinding == NULL) {
@@ -471,11 +473,6 @@ static int RSA_eay_private_encrypt(int f
         } else
             d = rsa->d;
 
-        if (rsa->flags & RSA_FLAG_CACHE_PUBLIC)
-            if (!BN_MONT_CTX_set_locked
-                (&rsa->_method_mod_n, CRYPTO_LOCK_RSA, rsa->n, ctx))
-                goto err;
-
         if (!rsa->meth->bn_mod_exp(ret, f, d, rsa->n, ctx,
                                    rsa->_method_mod_n))
             goto err;
@@ -495,15 +492,10 @@ static int RSA_eay_private_encrypt(int f
         res = ret;
 
     /*
-     * put in leading 0 bytes if the number is less than the length of the
-     * modulus
+     * BN_bn2binpad puts in leading 0 bytes if the number is less than
+     * the length of the modulus.
      */
-    j = BN_num_bytes(res);
-    i = BN_bn2bin(res, &(to[num - j]));
-    for (k = 0; k < (num - i); k++)
-        to[k] = 0;
-
-    r = num;
+    r = bn_bn2binpad(res, to, num);
  err:
     if (ctx != NULL) {
         BN_CTX_end(ctx);
@@ -521,7 +513,6 @@ static int RSA_eay_private_decrypt(int f
 {
     BIGNUM *f, *ret;
     int j, num = 0, r = -1;
-    unsigned char *p;
     unsigned char *buf = NULL;
     BN_CTX *ctx = NULL;
     int local_blinding = 0;
@@ -628,8 +619,7 @@ static int RSA_eay_private_decrypt(int f
         if (!rsa_blinding_invert(blinding, ret, unblind, ctx))
             goto err;
 
-    p = buf;
-    j = BN_bn2bin(ret, p);      /* j is only used with no-padding mode */
+    j = bn_bn2binpad(ret, buf, num);
 
     switch (padding) {
     case RSA_PKCS1_PADDING:
@@ -644,14 +634,14 @@ static int RSA_eay_private_decrypt(int f
         r = RSA_padding_check_SSLv23(to, num, buf, j, num);
         break;
     case RSA_NO_PADDING:
-        r = RSA_padding_check_none(to, num, buf, j, num);
+        memcpy(to, buf, (r = j));
         break;
     default:
         RSAerr(RSA_F_RSA_EAY_PRIVATE_DECRYPT, RSA_R_UNKNOWN_PADDING_TYPE);
         goto err;
     }
-    if (r < 0)
-        RSAerr(RSA_F_RSA_EAY_PRIVATE_DECRYPT, RSA_R_PADDING_CHECK_FAILED);
+    RSAerr(RSA_F_RSA_EAY_PRIVATE_DECRYPT, RSA_R_PADDING_CHECK_FAILED);
+    err_clear_last_constant_time(1 & ~constant_time_msb(r));
 
  err:
     if (ctx != NULL) {
@@ -671,7 +661,6 @@ static int RSA_eay_public_decrypt(int fl
 {
     BIGNUM *f, *ret;
     int i, num = 0, r = -1;
-    unsigned char *p;
     unsigned char *buf = NULL;
     BN_CTX *ctx = NULL;
 
@@ -752,8 +741,7 @@ static int RSA_eay_public_decrypt(int fl
         if (!BN_sub(ret, rsa->n, ret))
             goto err;
 
-    p = buf;
-    i = BN_bn2bin(ret, p);
+    i = bn_bn2binpad(ret, buf, num);
 
     switch (padding) {
     case RSA_PKCS1_PADDING:
@@ -763,7 +751,7 @@ static int RSA_eay_public_decrypt(int fl
         r = RSA_padding_check_X931(to, num, buf, i, num);
         break;
     case RSA_NO_PADDING:
-        r = RSA_padding_check_none(to, num, buf, i, num);
+        memcpy(to, buf, (r = i));
         break;
     default:
         RSAerr(RSA_F_RSA_EAY_PUBLIC_DECRYPT, RSA_R_UNKNOWN_PADDING_TYPE);
@@ -789,7 +777,7 @@ static int RSA_eay_mod_exp(BIGNUM *r0, c
     BIGNUM *r1, *m1, *vrfy;
     BIGNUM local_dmp1, local_dmq1, local_c, local_r1;
     BIGNUM *dmp1, *dmq1, *c, *pr1;
-    int ret = 0;
+    int ret = 0, smooth = 0;
 
     BN_CTX_start(ctx);
     r1 = BN_CTX_get(ctx);
@@ -824,6 +812,9 @@ static int RSA_eay_mod_exp(BIGNUM *r0, c
             if (!BN_MONT_CTX_set_locked
                 (&rsa->_method_mod_q, CRYPTO_LOCK_RSA, q, ctx))
                 goto err;
+
+            smooth = (rsa->meth->bn_mod_exp == BN_mod_exp_mont)
+                     && (BN_num_bits(q) == BN_num_bits(p));
         }
     }
 
@@ -832,6 +823,47 @@ static int RSA_eay_mod_exp(BIGNUM *r0, c
             (&rsa->_method_mod_n, CRYPTO_LOCK_RSA, rsa->n, ctx))
             goto err;
 
+    if (smooth) {
+        /*
+         * Conversion from Montgomery domain, a.k.a. Montgomery reduction,
+         * accepts values in [0-m*2^w) range. w is m's bit width rounded up
+         * to limb width. So that at the very least if |I| is fully reduced,
+         * i.e. less than p*q, we can count on from-to round to perform
+         * below modulo operations on |I|. Unlike BN_mod it's constant time.
+         */
+        if (/* m1 = I moq q */
+            !bn_from_mont_fixed_top(m1, I, rsa->_method_mod_q, ctx)
+            || !bn_to_mont_fixed_top(m1, m1, rsa->_method_mod_q, ctx)
+            /* m1 = m1^dmq1 mod q */
+            || !BN_mod_exp_mont_consttime(m1, m1, rsa->dmq1, rsa->q, ctx,
+                                          rsa->_method_mod_q)
+            /* r1 = I mod p */
+            || !bn_from_mont_fixed_top(r1, I, rsa->_method_mod_p, ctx)
+            || !bn_to_mont_fixed_top(r1, r1, rsa->_method_mod_p, ctx)
+            /* r1 = r1^dmp1 mod p */
+            || !BN_mod_exp_mont_consttime(r1, r1, rsa->dmp1, rsa->p, ctx,
+                                          rsa->_method_mod_p)
+            /* r1 = (r1 - m1) mod p */
+            /*
+             * bn_mod_sub_fixed_top is not regular modular subtraction,
+             * it can tolerate subtrahend to be larger than modulus, but
+             * not bit-wise wider. This makes up for uncommon q>p case,
+             * when |m1| can be larger than |rsa->p|.
+             */
+            || !bn_mod_sub_fixed_top(r1, r1, m1, rsa->p)
+
+            /* r1 = r1 * iqmp mod p */
+            || !bn_to_mont_fixed_top(r1, r1, rsa->_method_mod_p, ctx)
+            || !bn_mul_mont_fixed_top(r1, r1, rsa->iqmp, rsa->_method_mod_p,
+                                      ctx)
+            /* r0 = r1 * q + m1 */
+            || !bn_mul_fixed_top(r0, r1, rsa->q, ctx)
+            || !bn_mod_add_fixed_top(r0, r0, m1, rsa->n))
+            goto err;
+
+        goto tail;
+    }
+
     /* compute I mod q */
     if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) {
         c = &local_c;
@@ -909,10 +941,18 @@ static int RSA_eay_mod_exp(BIGNUM *r0, c
     if (!BN_add(r0, r1, m1))
         goto err;
 
+ tail:
     if (rsa->e && rsa->n) {
-        if (!rsa->meth->bn_mod_exp(vrfy, r0, rsa->e, rsa->n, ctx,
-                                   rsa->_method_mod_n))
-            goto err;
+        if (rsa->meth->bn_mod_exp == BN_mod_exp_mont) {
+            if (!BN_mod_exp_mont(vrfy, r0, rsa->e, rsa->n, ctx,
+                                 rsa->_method_mod_n))
+                goto err;
+        } else {
+            bn_correct_top(r0);
+            if (!rsa->meth->bn_mod_exp(vrfy, r0, rsa->e, rsa->n, ctx,
+                                       rsa->_method_mod_n))
+                goto err;
+        }
         /*
          * If 'I' was greater than (or equal to) rsa->n, the operation will
          * be equivalent to using 'I mod n'. However, the result of the
@@ -921,6 +961,11 @@ static int RSA_eay_mod_exp(BIGNUM *r0, c
          */
         if (!BN_sub(vrfy, vrfy, I))
             goto err;
+        if (BN_is_zero(vrfy)) {
+            bn_correct_top(r0);
+            ret = 1;
+            goto err;   /* not actually error */
+        }
         if (!BN_mod(vrfy, vrfy, rsa->n, ctx))
             goto err;
         if (BN_is_negative(vrfy))
@@ -946,6 +991,15 @@ static int RSA_eay_mod_exp(BIGNUM *r0, c
                 goto err;
         }
     }
+    /*
+     * It's unfortunate that we have to bn_correct_top(r0). What hopefully
+     * saves the day is that correction is highly unlike, and private key
+     * operations are customarily performed on blinded message. Which means
+     * that attacker won't observe correlation with chosen plaintext.
+     * Secondly, remaining code would still handle it in same computational
+     * time and even conceal memory access pattern around corrected top.
+     */
+    bn_correct_top(r0);
     ret = 1;
  err:
     BN_CTX_end(ctx);
diff -up openssl-1.0.2k/crypto/rsa/rsa_oaep.c.9-lives openssl-1.0.2k/crypto/rsa/rsa_oaep.c
--- openssl-1.0.2k/crypto/rsa/rsa_oaep.c.9-lives	2017-01-26 14:22:03.000000000 +0100
+++ openssl-1.0.2k/crypto/rsa/rsa_oaep.c	2019-04-05 10:50:56.138104353 +0200
@@ -120,8 +120,8 @@ int RSA_padding_check_PKCS1_OAEP_mgf1(un
                                       int plen, const EVP_MD *md,
                                       const EVP_MD *mgf1md)
 {
-    int i, dblen, mlen = -1, one_index = 0, msg_index;
-    unsigned int good, found_one_byte;
+    int i, dblen = 0, mlen = -1, one_index = 0, msg_index;
+    unsigned int good = 0, found_one_byte, mask;
     const unsigned char *maskedseed, *maskeddb;
     /*
      * |em| is the encoded message, zero-padded to exactly |num| bytes: em =
@@ -144,31 +144,42 @@ int RSA_padding_check_PKCS1_OAEP_mgf1(un
      * |num| is the length of the modulus; |flen| is the length of the
      * encoded message. Therefore, for any |from| that was obtained by
      * decrypting a ciphertext, we must have |flen| <= |num|. Similarly,
-     * num < 2 * mdlen + 2 must hold for the modulus irrespective of
+     * |num| >= 2 * |mdlen| + 2 must hold for the modulus irrespective of
      * the ciphertext, see PKCS #1 v2.2, section 7.1.2.
      * This does not leak any side-channel information.
      */
-    if (num < flen || num < 2 * mdlen + 2)
-        goto decoding_err;
+    if (num < flen || num < 2 * mdlen + 2) {
+        RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP_MGF1,
+               RSA_R_OAEP_DECODING_ERROR);
+        return -1;
+    }
 
     dblen = num - mdlen - 1;
     db = OPENSSL_malloc(dblen);
-    em = OPENSSL_malloc(num);
-    if (db == NULL || em == NULL) {
+    if (db == NULL) {
         RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP_MGF1, ERR_R_MALLOC_FAILURE);
         goto cleanup;
     }
 
+    em = OPENSSL_malloc(num);
+    if (em == NULL) {
+        RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP_MGF1,
+               ERR_R_MALLOC_FAILURE);
+        goto cleanup;
+    }
+
     /*
-     * Always do this zero-padding copy (even when num == flen) to avoid
-     * leaking that information. The copy still leaks some side-channel
-     * information, but it's impossible to have a fixed  memory access
-     * pattern since we can't read out of the bounds of |from|.
-     *
-     * TODO(emilia): Consider porting BN_bn2bin_padded from BoringSSL.
-     */
-    memset(em, 0, num);
-    memcpy(em + num - flen, from, flen);
+     * Caller is encouraged to pass zero-padded message created with
+     * BN_bn2binpad. Trouble is that since we can't read out of |from|'s
+     * bounds, it's impossible to have an invariant memory access pattern
+     * in case |from| was not zero-padded in advance.
+     */
+    for (from += flen, em += num, i = 0; i < num; i++) {
+        mask = ~constant_time_is_zero(flen);
+        flen -= 1 & mask;
+        from -= 1 & mask;
+        *--em = *from & mask;
+    }
 
     /*
      * The first byte must be zero, however we must not leak if this is
@@ -215,33 +226,53 @@ int RSA_padding_check_PKCS1_OAEP_mgf1(un
      * so plaintext-awareness ensures timing side-channels are no longer a
      * concern.
      */
-    if (!good)
-        goto decoding_err;
-
     msg_index = one_index + 1;
     mlen = dblen - msg_index;
 
-    if (tlen < mlen) {
-        RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP_MGF1, RSA_R_DATA_TOO_LARGE);
-        mlen = -1;
-    } else {
-        memcpy(to, db + msg_index, mlen);
-        goto cleanup;
+    /*
+     * For good measure, do this check in constant time as well.
+     */
+    good &= constant_time_ge(tlen, mlen);
+
+    /*
+     * Move the result in-place by |dblen|-|mdlen|-1-|mlen| bytes to the left.
+     * Then if |good| move |mlen| bytes from |db|+|mdlen|+1 to |to|.
+     * Otherwise leave |to| unchanged.
+     * Copy the memory back in a way that does not reveal the size of
+     * the data being copied via a timing side channel. This requires copying
+     * parts of the buffer multiple times based on the bits set in the real
+     * length. Clear bits do a non-copy with identical access pattern.
+     * The loop below has overall complexity of O(N*log(N)).
+     */
+    tlen = constant_time_select_int(constant_time_lt(dblen - mdlen - 1, tlen),
+                                    dblen - mdlen - 1, tlen);
+    for (msg_index = 1; msg_index < dblen - mdlen - 1; msg_index <<= 1) {
+        mask = ~constant_time_eq(msg_index & (dblen - mdlen - 1 - mlen), 0);
+        for (i = mdlen + 1; i < dblen - msg_index; i++)
+            db[i] = constant_time_select_8(mask, db[i + msg_index], db[i]);
+    }
+    for (i = 0; i < tlen; i++) {
+        mask = good & constant_time_lt(i, mlen);
+        to[i] = constant_time_select_8(mask, db[i + mdlen + 1], to[i]);
     }
 
- decoding_err:
     /*
      * To avoid chosen ciphertext attacks, the error message should not
      * reveal which kind of decoding error happened.
      */
     RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP_MGF1,
            RSA_R_OAEP_DECODING_ERROR);
+    err_clear_last_constant_time(1 & good);
  cleanup:
+    OPENSSL_cleanse(seed, sizeof(seed));
     if (db != NULL)
-        OPENSSL_free(db);
+        OPENSSL_cleanse(db, dblen);
+    OPENSSL_free(db);
     if (em != NULL)
-        OPENSSL_free(em);
-    return mlen;
+        OPENSSL_cleanse(em, num);
+    OPENSSL_free(em);
+
+    return constant_time_select_int(good, mlen, -1);
 }
 
 int PKCS1_MGF1(unsigned char *mask, long len,
diff -up openssl-1.0.2k/crypto/rsa/rsa_pk1.c.9-lives openssl-1.0.2k/crypto/rsa/rsa_pk1.c
--- openssl-1.0.2k/crypto/rsa/rsa_pk1.c.9-lives	2017-01-26 14:22:03.000000000 +0100
+++ openssl-1.0.2k/crypto/rsa/rsa_pk1.c	2019-04-05 10:50:56.139104335 +0200
@@ -98,6 +98,27 @@ int RSA_padding_check_PKCS1_type_1(unsig
     const unsigned char *p;
 
     p = from;
+
+    /*
+     * The format is
+     * 00 || 01 || PS || 00 || D
+     * PS - padding string, at least 8 bytes of FF
+     * D  - data.
+     */
+
+    if (num < 11)
+        return -1;
+
+    /* Accept inputs with and without the leading 0-byte. */
+    if (num == flen) {
+        if ((*p++) != 0x00) {
+            RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_1,
+                   RSA_R_INVALID_PADDING);
+            return -1;
+        }
+        flen--;
+    }
+
     if ((num != (flen + 1)) || (*(p++) != 01)) {
         RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_1,
                RSA_R_BLOCK_TYPE_IS_NOT_01);
@@ -186,7 +207,7 @@ int RSA_padding_check_PKCS1_type_2(unsig
     int i;
     /* |em| is the encoded message, zero-padded to exactly |num| bytes */
     unsigned char *em = NULL;
-    unsigned int good, found_zero_byte;
+    unsigned int good, found_zero_byte, mask;
     int zero_index = 0, msg_index, mlen = -1;
 
     if (tlen < 0 || flen < 0)
@@ -197,37 +218,40 @@ int RSA_padding_check_PKCS1_type_2(unsig
      * section 7.2.2.
      */
 
-    if (flen > num)
-        goto err;
-
-    if (num < 11)
-        goto err;
+    if (flen > num || num < 11) {
+        RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_2,
+               RSA_R_PKCS_DECODING_ERROR);
+        return -1;
+    }
 
     em = OPENSSL_malloc(num);
     if (em == NULL) {
         RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_2, ERR_R_MALLOC_FAILURE);
         return -1;
     }
-    memset(em, 0, num);
     /*
-     * Always do this zero-padding copy (even when num == flen) to avoid
-     * leaking that information. The copy still leaks some side-channel
-     * information, but it's impossible to have a fixed  memory access
-     * pattern since we can't read out of the bounds of |from|.
-     *
-     * TODO(emilia): Consider porting BN_bn2bin_padded from BoringSSL.
-     */
-    memcpy(em + num - flen, from, flen);
+     * Caller is encouraged to pass zero-padded message created with
+     * BN_bn2binpad. Trouble is that since we can't read out of |from|'s
+     * bounds, it's impossible to have an invariant memory access pattern
+     * in case |from| was not zero-padded in advance.
+     */
+    for (from += flen, em += num, i = 0; i < num; i++) {
+        mask = ~constant_time_is_zero(flen);
+        flen -= 1 & mask;
+        from -= 1 & mask;
+        *--em = *from & mask;
+    }
 
     good = constant_time_is_zero(em[0]);
     good &= constant_time_eq(em[1], 2);
 
+    /* scan over padding data */
     found_zero_byte = 0;
     for (i = 2; i < num; i++) {
         unsigned int equals0 = constant_time_is_zero(em[i]);
-        zero_index =
-            constant_time_select_int(~found_zero_byte & equals0, i,
-                                     zero_index);
+
+        zero_index = constant_time_select_int(~found_zero_byte & equals0,
+                                              i, zero_index);
         found_zero_byte |= equals0;
     }
 
@@ -236,7 +260,7 @@ int RSA_padding_check_PKCS1_type_2(unsig
      * If we never found a 0-byte, then |zero_index| is 0 and the check
      * also fails.
      */
-    good &= constant_time_ge((unsigned int)(zero_index), 2 + 8);
+    good &= constant_time_ge(zero_index, 2 + 8);
 
     /*
      * Skip the zero byte. This is incorrect if we never found a zero-byte
@@ -246,30 +270,36 @@ int RSA_padding_check_PKCS1_type_2(unsig
     mlen = num - msg_index;
 
     /*
-     * For good measure, do this check in constant time as well; it could
-     * leak something if |tlen| was assuming valid padding.
+     * For good measure, do this check in constant time as well.
      */
-    good &= constant_time_ge((unsigned int)(tlen), (unsigned int)(mlen));
+    good &= constant_time_ge(tlen, mlen);
 
     /*
-     * We can't continue in constant-time because we need to copy the result
-     * and we cannot fake its length. This unavoidably leaks timing
-     * information at the API boundary.
-     * TODO(emilia): this could be addressed at the call site,
-     * see BoringSSL commit 0aa0767340baf925bda4804882aab0cb974b2d26.
-     */
-    if (!good) {
-        mlen = -1;
-        goto err;
-    }
+     * Move the result in-place by |num|-11-|mlen| bytes to the left.
+     * Then if |good| move |mlen| bytes from |em|+11 to |to|.
+     * Otherwise leave |to| unchanged.
+     * Copy the memory back in a way that does not reveal the size of
+     * the data being copied via a timing side channel. This requires copying
+     * parts of the buffer multiple times based on the bits set in the real
+     * length. Clear bits do a non-copy with identical access pattern.
+     * The loop below has overall complexity of O(N*log(N)).
+     */
+    tlen = constant_time_select_int(constant_time_lt(num - 11, tlen),
+                                    num - 11, tlen);
+    for (msg_index = 1; msg_index < num - 11; msg_index <<= 1) {
+        mask = ~constant_time_eq(msg_index & (num - 11 - mlen), 0);
+        for (i = 11; i < num - msg_index; i++)
+            em[i] = constant_time_select_8(mask, em[i + msg_index], em[i]);
+    }
+    for (i = 0; i < tlen; i++) {
+        mask = good & constant_time_lt(i, mlen);
+        to[i] = constant_time_select_8(mask, em[i + 11], to[i]);
+    }
+
+    OPENSSL_cleanse(em, num);
+    OPENSSL_free(em);
+    RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_2, RSA_R_PKCS_DECODING_ERROR);
+    err_clear_last_constant_time(1 & good);
 
-    memcpy(to, em + msg_index, mlen);
-
- err:
-    if (em != NULL)
-        OPENSSL_free(em);
-    if (mlen == -1)
-        RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_2,
-               RSA_R_PKCS_DECODING_ERROR);
-    return mlen;
+    return constant_time_select_int(good, mlen, -1);
 }
diff -up openssl-1.0.2k/crypto/rsa/rsa_ssl.c.9-lives openssl-1.0.2k/crypto/rsa/rsa_ssl.c
--- openssl-1.0.2k/crypto/rsa/rsa_ssl.c.9-lives	2017-01-26 14:22:03.000000000 +0100
+++ openssl-1.0.2k/crypto/rsa/rsa_ssl.c	2019-04-05 10:50:56.139104335 +0200
@@ -61,6 +61,7 @@
 #include <openssl/bn.h>
 #include <openssl/rsa.h>
 #include <openssl/rand.h>
+#include "constant_time_locl.h"
 
 int RSA_padding_add_SSLv23(unsigned char *to, int tlen,
                            const unsigned char *from, int flen)
@@ -101,49 +102,119 @@ int RSA_padding_add_SSLv23(unsigned char
     return (1);
 }
 
+/*
+ * Copy of RSA_padding_check_PKCS1_type_2 with a twist that rejects padding
+ * if nul delimiter is not preceded by 8 consecutive 0x03 bytes. It also
+ * preserves error code reporting for backward compatibility.
+ */
 int RSA_padding_check_SSLv23(unsigned char *to, int tlen,
                              const unsigned char *from, int flen, int num)
 {
-    int i, j, k;
-    const unsigned char *p;
+    int i;
+    /* |em| is the encoded message, zero-padded to exactly |num| bytes */
+    unsigned char *em = NULL;
+    unsigned int good, found_zero_byte, mask, threes_in_row;
+    int zero_index = 0, msg_index, mlen = -1, err;
 
-    p = from;
-    if (flen < 10) {
+    if (tlen <= 0 || flen <= 0)
+        return -1;
+
+    if (flen > num || num < 11) {
         RSAerr(RSA_F_RSA_PADDING_CHECK_SSLV23, RSA_R_DATA_TOO_SMALL);
         return (-1);
     }
-    if ((num != (flen + 1)) || (*(p++) != 02)) {
-        RSAerr(RSA_F_RSA_PADDING_CHECK_SSLV23, RSA_R_BLOCK_TYPE_IS_NOT_02);
-        return (-1);
-    }
 
-    /* scan over padding data */
-    j = flen - 1;               /* one for type */
-    for (i = 0; i < j; i++)
-        if (*(p++) == 0)
-            break;
-
-    if ((i == j) || (i < 8)) {
-        RSAerr(RSA_F_RSA_PADDING_CHECK_SSLV23,
-               RSA_R_NULL_BEFORE_BLOCK_MISSING);
-        return (-1);
-    }
-    for (k = -9; k < -1; k++) {
-        if (p[k] != 0x03)
-            break;
-    }
-    if (k == -1) {
-        RSAerr(RSA_F_RSA_PADDING_CHECK_SSLV23, RSA_R_SSLV3_ROLLBACK_ATTACK);
-        return (-1);
-    }
+    em = OPENSSL_malloc(num);
+    if (em == NULL) {
+        RSAerr(RSA_F_RSA_PADDING_CHECK_SSLV23, ERR_R_MALLOC_FAILURE);
+        return -1;
+    }
+    /*
+     * Caller is encouraged to pass zero-padded message created with
+     * BN_bn2binpad. Trouble is that since we can't read out of |from|'s
+     * bounds, it's impossible to have an invariant memory access pattern
+     * in case |from| was not zero-padded in advance.
+     */
+    for (from += flen, em += num, i = 0; i < num; i++) {
+        mask = ~constant_time_is_zero(flen);
+        flen -= 1 & mask;
+        from -= 1 & mask;
+        *--em = *from & mask;
+    }
+
+    good = constant_time_is_zero(em[0]);
+    good &= constant_time_eq(em[1], 2);
+    err = constant_time_select_int(good, 0, RSA_R_BLOCK_TYPE_IS_NOT_02);
+    mask = ~good;
 
-    i++;                        /* Skip over the '\0' */
-    j -= i;
-    if (j > tlen) {
-        RSAerr(RSA_F_RSA_PADDING_CHECK_SSLV23, RSA_R_DATA_TOO_LARGE);
-        return (-1);
-    }
-    memcpy(to, p, (unsigned int)j);
+    /* scan over padding data */
+    found_zero_byte = 0;
+    threes_in_row = 0;
+    for (i = 2; i < num; i++) {
+        unsigned int equals0 = constant_time_is_zero(em[i]);
+
+        zero_index = constant_time_select_int(~found_zero_byte & equals0,
+                                              i, zero_index);
+        found_zero_byte |= equals0;
+
+        threes_in_row += 1 & ~found_zero_byte;
+        threes_in_row &= found_zero_byte | constant_time_eq(em[i], 3);
+    }
+
+    /*
+     * PS must be at least 8 bytes long, and it starts two bytes into |em|.
+     * If we never found a 0-byte, then |zero_index| is 0 and the check
+     * also fails.
+     */
+    good &= constant_time_ge(zero_index, 2 + 8);
+    err = constant_time_select_int(mask | good, err,
+                                   RSA_R_NULL_BEFORE_BLOCK_MISSING);
+    mask = ~good;
+
+    good &= constant_time_ge(threes_in_row, 8);
+    err = constant_time_select_int(mask | good, err,
+                                   RSA_R_SSLV3_ROLLBACK_ATTACK);
+    mask = ~good;
+
+    /*
+     * Skip the zero byte. This is incorrect if we never found a zero-byte
+     * but in this case we also do not copy the message out.
+     */
+    msg_index = zero_index + 1;
+    mlen = num - msg_index;
+
+    /*
+     * For good measure, do this check in constant time as well.
+     */
+    good &= constant_time_ge(tlen, mlen);
+    err = constant_time_select_int(mask | good, err, RSA_R_DATA_TOO_LARGE);
+
+    /*
+     * Move the result in-place by |num|-11-|mlen| bytes to the left.
+     * Then if |good| move |mlen| bytes from |em|+11 to |to|.
+     * Otherwise leave |to| unchanged.
+     * Copy the memory back in a way that does not reveal the size of
+     * the data being copied via a timing side channel. This requires copying
+     * parts of the buffer multiple times based on the bits set in the real
+     * length. Clear bits do a non-copy with identical access pattern.
+     * The loop below has overall complexity of O(N*log(N)).
+     */
+    tlen = constant_time_select_int(constant_time_lt(num - 11, tlen),
+                                    num - 11, tlen);
+    for (msg_index = 1; msg_index < num - 11; msg_index <<= 1) {
+        mask = ~constant_time_eq(msg_index & (num - 11 - mlen), 0);
+        for (i = 11; i < num - msg_index; i++)
+            em[i] = constant_time_select_8(mask, em[i + msg_index], em[i]);
+    }
+    for (i = 0; i < tlen; i++) {
+        mask = good & constant_time_lt(i, mlen);
+        to[i] = constant_time_select_8(mask, em[i + 11], to[i]);
+    }
+
+    OPENSSL_cleanse(em, num);
+    OPENSSL_free(em);
+    RSAerr(RSA_F_RSA_PADDING_CHECK_SSLV23, err);
+    err_clear_last_constant_time(1 & good);
 
-    return (j);
+    return constant_time_select_int(good, mlen, -1);
 }
diff -up openssl-1.0.2k/doc/crypto/RSA_padding_add_PKCS1_type_1.pod.9-lives openssl-1.0.2k/doc/crypto/RSA_padding_add_PKCS1_type_1.pod
--- openssl-1.0.2k/doc/crypto/RSA_padding_add_PKCS1_type_1.pod.9-lives	2017-01-26 14:22:04.000000000 +0100
+++ openssl-1.0.2k/doc/crypto/RSA_padding_add_PKCS1_type_1.pod	2019-04-05 10:50:56.139104335 +0200
@@ -104,6 +104,18 @@ The RSA_padding_check_xxx() functions re
 recovered data, -1 on error. Error codes can be obtained by calling
 L<ERR_get_error(3)|ERR_get_error(3)>.
 
+=head1 WARNING
+
+The RSA_padding_check_PKCS1_type_2() padding check leaks timing
+information which can potentially be used to mount a Bleichenbacher
+padding oracle attack. This is an inherent weakness in the PKCS #1
+v1.5 padding design. Prefer PKCS1_OAEP padding. Otherwise it can
+be recommended to pass zero-padded B<f>, so that B<fl> equals to
+B<rsa_len>, and if fixed by protocol, B<tlen> being set to the
+expected length. In such case leakage would be minimal, it would
+take attacker's ability to observe memory access pattern with byte
+granilarity as it occurs, post-factum timing analysis won't do.
+
 =head1 SEE ALSO
 
 L<RSA_public_encrypt(3)|RSA_public_encrypt(3)>,