Blame SOURCES/openssl-1.0.2a-padlock64.patch

cfec1a
diff -up openssl-1.0.2a/engines/e_padlock.c.padlock64 openssl-1.0.2a/engines/e_padlock.c
cfec1a
--- openssl-1.0.2a/engines/e_padlock.c.padlock64	2015-03-19 14:19:00.000000000 +0100
cfec1a
+++ openssl-1.0.2a/engines/e_padlock.c	2015-04-22 16:23:44.105617468 +0200
cfec1a
@@ -101,7 +101,10 @@
cfec1a
  */
cfec1a
 #  undef COMPILE_HW_PADLOCK
cfec1a
 #  if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
cfec1a
-#   if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \
cfec1a
+#  if (defined(__GNUC__) && __GNUC__>=2 && \
cfec1a
+       (defined(__i386__) || defined(__i386) || \
cfec1a
+        defined(__x86_64__) || defined(__x86_64)) \
cfec1a
+     ) || \
cfec1a
      (defined(_MSC_VER) && defined(_M_IX86))
cfec1a
 #    define COMPILE_HW_PADLOCK
cfec1a
 #   endif
cfec1a
@@ -140,7 +143,7 @@ void ENGINE_load_padlock(void)
cfec1a
 #    endif
cfec1a
 #   elif defined(__GNUC__)
cfec1a
 #    ifndef alloca
cfec1a
-#     define alloca(s) __builtin_alloca(s)
cfec1a
+#     define alloca(s) __builtin_alloca((s))
cfec1a
 #    endif
cfec1a
 #   endif
cfec1a
 
cfec1a
@@ -303,6 +306,7 @@ static volatile struct padlock_cipher_da
cfec1a
  * =======================================================
cfec1a
  */
cfec1a
 #   if defined(__GNUC__) && __GNUC__>=2
cfec1a
+#    if defined(__i386__) || defined(__i386)
cfec1a
 /*
cfec1a
  * As for excessive "push %ebx"/"pop %ebx" found all over.
cfec1a
  * When generating position-independent code GCC won't let
cfec1a
@@ -379,22 +383,6 @@ static int padlock_available(void)
cfec1a
     return padlock_use_ace + padlock_use_rng;
cfec1a
 }
cfec1a
 
cfec1a
-#    ifndef OPENSSL_NO_AES
cfec1a
-#     ifndef AES_ASM
cfec1a
-/* Our own htonl()/ntohl() */
cfec1a
-static inline void padlock_bswapl(AES_KEY *ks)
cfec1a
-{
cfec1a
-    size_t i = sizeof(ks->rd_key) / sizeof(ks->rd_key[0]);
cfec1a
-    unsigned int *key = ks->rd_key;
cfec1a
-
cfec1a
-    while (i--) {
cfec1a
-        asm volatile ("bswapl %0":"+r" (*key));
cfec1a
-        key++;
cfec1a
-    }
cfec1a
-}
cfec1a
-#     endif
cfec1a
-#    endif
cfec1a
-
cfec1a
 /*
cfec1a
  * Force key reload from memory to the CPU microcode. Loading EFLAGS from the
cfec1a
  * stack clears EFLAGS[30] which does the trick.
cfec1a
@@ -404,7 +392,7 @@ static inline void padlock_reload_key(vo
cfec1a
     asm volatile ("pushfl; popfl");
cfec1a
 }
cfec1a
 
cfec1a
-#    ifndef OPENSSL_NO_AES
cfec1a
+#     ifndef OPENSSL_NO_AES
cfec1a
 /*
cfec1a
  * This is heuristic key context tracing. At first one
cfec1a
  * believes that one should use atomic swap instructions,
cfec1a
@@ -448,6 +436,101 @@ static inline void *name(size_t cnt,
cfec1a
                 : "edx", "cc", "memory");       \
cfec1a
         return iv;                              \
cfec1a
 }
cfec1a
+#     endif
cfec1a
+
cfec1a
+#    elif defined(__x86_64__) || defined(__x86_64)
cfec1a
+
cfec1a
+/* Load supported features of the CPU to see if
cfec1a
+   the PadLock is available. */
cfec1a
+static int padlock_available(void)
cfec1a
+{
cfec1a
+    char vendor_string[16];
cfec1a
+    unsigned int eax, edx;
cfec1a
+
cfec1a
+    /* Are we running on the Centaur (VIA) CPU? */
cfec1a
+    eax = 0x00000000;
cfec1a
+    vendor_string[12] = 0;
cfec1a
+    asm volatile ("cpuid\n"
cfec1a
+                  "movl   %%ebx,(%1)\n"
cfec1a
+                  "movl   %%edx,4(%1)\n"
cfec1a
+                  "movl   %%ecx,8(%1)\n":"+a" (eax):"r"(vendor_string):"rbx",
cfec1a
+                  "rcx", "rdx");
cfec1a
+    if (strcmp(vendor_string, "CentaurHauls") != 0)
cfec1a
+        return 0;
cfec1a
+
cfec1a
+    /* Check for Centaur Extended Feature Flags presence */
cfec1a
+    eax = 0xC0000000;
cfec1a
+    asm volatile ("cpuid":"+a" (eax)::"rbx", "rcx", "rdx");
cfec1a
+    if (eax < 0xC0000001)
cfec1a
+        return 0;
cfec1a
+
cfec1a
+    /* Read the Centaur Extended Feature Flags */
cfec1a
+    eax = 0xC0000001;
cfec1a
+    asm volatile ("cpuid":"+a" (eax), "=d"(edx)::"rbx", "rcx");
cfec1a
+
cfec1a
+    /* Fill up some flags */
cfec1a
+    padlock_use_ace = ((edx & (0x3 << 6)) == (0x3 << 6));
cfec1a
+    padlock_use_rng = ((edx & (0x3 << 2)) == (0x3 << 2));
cfec1a
+
cfec1a
+    return padlock_use_ace + padlock_use_rng;
cfec1a
+}
cfec1a
+
cfec1a
+/* Force key reload from memory to the CPU microcode.
cfec1a
+   Loading EFLAGS from the stack clears EFLAGS[30]
cfec1a
+   which does the trick. */
cfec1a
+static inline void padlock_reload_key(void)
cfec1a
+{
cfec1a
+    asm volatile ("pushfq; popfq");
cfec1a
+}
cfec1a
+
cfec1a
+#     ifndef OPENSSL_NO_AES
cfec1a
+/*
cfec1a
+ * This is heuristic key context tracing. At first one
cfec1a
+ * believes that one should use atomic swap instructions,
cfec1a
+ * but it's not actually necessary. Point is that if
cfec1a
+ * padlock_saved_context was changed by another thread
cfec1a
+ * after we've read it and before we compare it with cdata,
cfec1a
+ * our key *shall* be reloaded upon thread context switch
cfec1a
+ * and we are therefore set in either case...
cfec1a
+ */
cfec1a
+static inline void padlock_verify_context(struct padlock_cipher_data *cdata)
cfec1a
+{
cfec1a
+    asm volatile ("pushfq\n"
cfec1a
+                  "       btl     $30,(%%rsp)\n"
cfec1a
+                  "       jnc     1f\n"
cfec1a
+                  "       cmpq    %2,%1\n"
cfec1a
+                  "       je      1f\n"
cfec1a
+                  "       popfq\n"
cfec1a
+                  "       subq    $8,%%rsp\n"
cfec1a
+                  "1:     addq    $8,%%rsp\n"
cfec1a
+                  "       movq    %2,%0":"+m" (padlock_saved_context)
cfec1a
+                  :"r"(padlock_saved_context), "r"(cdata):"cc");
cfec1a
+}
cfec1a
+
cfec1a
+/* Template for padlock_xcrypt_* modes */
cfec1a
+/* BIG FAT WARNING:
cfec1a
+ *      The offsets used with 'leal' instructions
cfec1a
+ *      describe items of the 'padlock_cipher_data'
cfec1a
+ *      structure.
cfec1a
+ */
cfec1a
+#      define PADLOCK_XCRYPT_ASM(name,rep_xcrypt)     \
cfec1a
+static inline void *name(size_t cnt,            \
cfec1a
+        struct padlock_cipher_data *cdata,      \
cfec1a
+        void *out, const void *inp)             \
cfec1a
+{       void *iv;                               \
cfec1a
+        asm volatile ( "leaq    16(%0),%%rdx\n" \
cfec1a
+                "       leaq    32(%0),%%rbx\n" \
cfec1a
+                        rep_xcrypt "\n"         \
cfec1a
+                : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
cfec1a
+                : "0"(cdata), "1"(cnt), "2"(out), "3"(inp)  \
cfec1a
+                : "rbx", "rdx", "cc", "memory");        \
cfec1a
+        return iv;                              \
cfec1a
+}
cfec1a
+#     endif
cfec1a
+
cfec1a
+#    endif                      /* cpu */
cfec1a
+
cfec1a
+#    ifndef OPENSSL_NO_AES
cfec1a
 
cfec1a
 /* Generate all functions with appropriate opcodes */
cfec1a
 /* rep xcryptecb */
cfec1a
@@ -458,6 +541,20 @@ PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, "
cfec1a
     PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0")
cfec1a
 /* rep xcryptofb */
cfec1a
     PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8")
cfec1a
+
cfec1a
+#     ifndef AES_ASM
cfec1a
+/* Our own htonl()/ntohl() */
cfec1a
+static inline void padlock_bswapl(AES_KEY *ks)
cfec1a
+{
cfec1a
+    size_t i = sizeof(ks->rd_key) / sizeof(ks->rd_key[0]);
cfec1a
+    unsigned int *key = ks->rd_key;
cfec1a
+
cfec1a
+    while (i--) {
cfec1a
+        asm volatile ("bswapl %0":"+r" (*key));
cfec1a
+        key++;
cfec1a
+    }
cfec1a
+}
cfec1a
+#     endif
cfec1a
 #    endif
cfec1a
 /* The RNG call itself */
cfec1a
 static inline unsigned int padlock_xstore(void *addr, unsigned int edx_in)
cfec1a
@@ -485,8 +582,8 @@ static inline unsigned int padlock_xstor
cfec1a
 static inline unsigned char *padlock_memcpy(void *dst, const void *src,
cfec1a
                                             size_t n)
cfec1a
 {
cfec1a
-    long *d = dst;
cfec1a
-    const long *s = src;
cfec1a
+    size_t *d = dst;
cfec1a
+    const size_t *s = src;
cfec1a
 
cfec1a
     n /= sizeof(*d);
cfec1a
     do {