3eeb22
diff -up openssl-1.0.2k/crypto/bn/bn_exp.c.one-and-done openssl-1.0.2k/crypto/bn/bn_exp.c
3eeb22
--- openssl-1.0.2k/crypto/bn/bn_exp.c.one-and-done	2019-04-04 16:46:21.287257363 +0200
3eeb22
+++ openssl-1.0.2k/crypto/bn/bn_exp.c	2019-04-04 16:45:32.875130057 +0200
3eeb22
@@ -579,7 +579,6 @@ int BN_mod_exp_mont(BIGNUM *rr, const BI
3eeb22
     return (ret);
3eeb22
 }
3eeb22
 
3eeb22
-#if defined(SPARC_T4_MONT)
3eeb22
 static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos)
3eeb22
 {
3eeb22
     BN_ULONG ret = 0;
3eeb22
@@ -598,7 +597,6 @@ static BN_ULONG bn_get_bits(const BIGNUM
3eeb22
 
3eeb22
     return ret & BN_MASK2;
3eeb22
 }
3eeb22
-#endif
3eeb22
 
3eeb22
 /*
3eeb22
  * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific
3eeb22
@@ -697,7 +695,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr
3eeb22
                               const BIGNUM *m, BN_CTX *ctx,
3eeb22
                               BN_MONT_CTX *in_mont)
3eeb22
 {
3eeb22
-    int i, bits, ret = 0, window, wvalue;
3eeb22
+    int i, bits, ret = 0, window, wvalue, wmask, window0;
3eeb22
     int top;
3eeb22
     BN_MONT_CTX *mont = NULL;
3eeb22
 
3eeb22
@@ -945,20 +943,27 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr
3eeb22
         top /= 2;
3eeb22
         bn_flip_t4(np, mont->N.d, top);
3eeb22
 
3eeb22
-        bits--;
3eeb22
-        for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
3eeb22
-            wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
3eeb22
+        /*
3eeb22
+         * The exponent may not have a whole number of fixed-size windows.
3eeb22
+         * To simplify the main loop, the initial window has between 1 and
3eeb22
+         * full-window-size bits such that what remains is always a whole
3eeb22
+         * number of windows
3eeb22
+         */
3eeb22
+        window0 = (bits - 1) % 5 + 1;
3eeb22
+        wmask = (1 << window0) - 1;
3eeb22
+        bits -= window0;
3eeb22
+        wvalue = bn_get_bits(p, bits) & wmask;
3eeb22
         bn_gather5_t4(tmp.d, top, powerbuf, wvalue);
3eeb22
 
3eeb22
         /*
3eeb22
          * Scan the exponent one window at a time starting from the most
3eeb22
          * significant bits.
3eeb22
          */
3eeb22
-        while (bits >= 0) {
3eeb22
+        while (bits > 0) {
3eeb22
             if (bits < stride)
3eeb22
-                stride = bits + 1;
3eeb22
+                stride = bits;
3eeb22
             bits -= stride;
3eeb22
-            wvalue = bn_get_bits(p, bits + 1);
3eeb22
+            wvalue = bn_get_bits(p, bits);
3eeb22
 
3eeb22
             if ((*pwr5_worker) (tmp.d, np, n0, powerbuf, wvalue, stride))
3eeb22
                 continue;
3eeb22
@@ -1066,32 +1071,36 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr
3eeb22
             bn_scatter5(tmp.d, top, powerbuf, i);
3eeb22
         }
3eeb22
 # endif
3eeb22
-        bits--;
3eeb22
-        for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
3eeb22
-            wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
3eeb22
+        /*
3eeb22
+         * The exponent may not have a whole number of fixed-size windows.
3eeb22
+         * To simplify the main loop, the initial window has between 1 and
3eeb22
+         * full-window-size bits such that what remains is always a whole
3eeb22
+         * number of windows
3eeb22
+         */
3eeb22
+        window0 = (bits - 1) % 5 + 1;
3eeb22
+        wmask = (1 << window0) - 1;
3eeb22
+        bits -= window0;
3eeb22
+        wvalue = bn_get_bits(p, bits) & wmask;
3eeb22
         bn_gather5(tmp.d, top, powerbuf, wvalue);
3eeb22
 
3eeb22
         /*
3eeb22
          * Scan the exponent one window at a time starting from the most
3eeb22
          * significant bits.
3eeb22
          */
3eeb22
-        if (top & 7)
3eeb22
-            while (bits >= 0) {
3eeb22
-                for (wvalue = 0, i = 0; i < 5; i++, bits--)
3eeb22
-                    wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
3eeb22
-
3eeb22
+        if (top & 7) {
3eeb22
+            while (bits > 0) {
3eeb22
                 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
3eeb22
                 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
3eeb22
                 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
3eeb22
                 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
3eeb22
                 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
3eeb22
                 bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top,
3eeb22
-                                    wvalue);
3eeb22
+                                    bn_get_bits5(p->d, bits -= 5));
3eeb22
+            }
3eeb22
         } else {
3eeb22
-            while (bits >= 0) {
3eeb22
-                wvalue = bn_get_bits5(p->d, bits - 4);
3eeb22
-                bits -= 5;
3eeb22
-                bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
3eeb22
+            while (bits > 0) {
3eeb22
+                bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top,
3eeb22
+                          bn_get_bits5(p->d, bits -= 5));
3eeb22
             }
3eeb22
         }
3eeb22
 
3eeb22
@@ -1133,28 +1142,45 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr
3eeb22
             }
3eeb22
         }
3eeb22
 
3eeb22
-        bits--;
3eeb22
-        for (wvalue = 0, i = bits % window; i >= 0; i--, bits--)
3eeb22
-            wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
3eeb22
+        /*
3eeb22
+         * The exponent may not have a whole number of fixed-size windows.
3eeb22
+         * To simplify the main loop, the initial window has between 1 and
3eeb22
+         * full-window-size bits such that what remains is always a whole
3eeb22
+         * number of windows
3eeb22
+         */
3eeb22
+        window0 = (bits - 1) % window + 1;
3eeb22
+        wmask = (1 << window0) - 1;
3eeb22
+        bits -= window0;
3eeb22
+        wvalue = bn_get_bits(p, bits) & wmask;
3eeb22
         if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf, wvalue,
3eeb22
                                             window))
3eeb22
             goto err;
3eeb22
 
3eeb22
+        wmask = (1 << window) - 1;
3eeb22
         /*
3eeb22
          * Scan the exponent one window at a time starting from the most
3eeb22
          * significant bits.
3eeb22
          */
3eeb22
-        while (bits >= 0) {
3eeb22
-            wvalue = 0;         /* The 'value' of the window */
3eeb22
+        while (bits > 0) {
3eeb22
 
3eeb22
-            /* Scan the window, squaring the result as we go */
3eeb22
-            for (i = 0; i < window; i++, bits--) {
3eeb22
+            /* Square the result window-size times */
3eeb22
+            for (i = 0; i < window; i++)
3eeb22
                 if (!bn_mul_mont_fixed_top(&tmp, &tmp, &tmp, mont, ctx))
3eeb22
                     goto err;
3eeb22
-                wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
3eeb22
-            }
3eeb22
 
3eeb22
             /*
3eeb22
+             * Get a window's worth of bits from the exponent
3eeb22
+             * This avoids calling BN_is_bit_set for each bit, which
3eeb22
+             * is not only slower but also makes each bit vulnerable to
3eeb22
+             * EM (and likely other) side-channel attacks like One&Done
3eeb22
+             * (for details see "One&Done: A Single-Decryption EM-Based
3eeb22
+             *  Attack on OpenSSL's Constant-Time Blinded RSA" by M. Alam,
3eeb22
+             *  H. Khan, M. Dey, N. Sinha, R. Callan, A. Zajic, and
3eeb22
+             *  M. Prvulovic, in USENIX Security'18)
3eeb22
+             */
3eeb22
+            bits -= window;
3eeb22
+            wvalue = bn_get_bits(p, bits) & wmask;
3eeb22
+            /*
3eeb22
              * Fetch the appropriate pre-computed value from the pre-buf
3eeb22
              */
3eeb22
             if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, wvalue,