|
|
96a864 |
diff -up openssl-1.0.2a/engines/e_padlock.c.padlock64 openssl-1.0.2a/engines/e_padlock.c
|
|
|
96a864 |
--- openssl-1.0.2a/engines/e_padlock.c.padlock64 2015-03-19 14:19:00.000000000 +0100
|
|
|
96a864 |
+++ openssl-1.0.2a/engines/e_padlock.c 2015-04-22 16:23:44.105617468 +0200
|
|
|
96a864 |
@@ -101,7 +101,10 @@
|
|
|
96a864 |
*/
|
|
|
96a864 |
# undef COMPILE_HW_PADLOCK
|
|
|
96a864 |
# if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
|
|
|
96a864 |
-# if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \
|
|
|
96a864 |
+# if (defined(__GNUC__) && __GNUC__>=2 && \
|
|
|
96a864 |
+ (defined(__i386__) || defined(__i386) || \
|
|
|
96a864 |
+ defined(__x86_64__) || defined(__x86_64)) \
|
|
|
96a864 |
+ ) || \
|
|
|
96a864 |
(defined(_MSC_VER) && defined(_M_IX86))
|
|
|
96a864 |
# define COMPILE_HW_PADLOCK
|
|
|
96a864 |
# endif
|
|
|
96a864 |
@@ -140,7 +143,7 @@ void ENGINE_load_padlock(void)
|
|
|
96a864 |
# endif
|
|
|
96a864 |
# elif defined(__GNUC__)
|
|
|
96a864 |
# ifndef alloca
|
|
|
96a864 |
-# define alloca(s) __builtin_alloca(s)
|
|
|
96a864 |
+# define alloca(s) __builtin_alloca((s))
|
|
|
96a864 |
# endif
|
|
|
96a864 |
# endif
|
|
|
96a864 |
|
|
|
96a864 |
@@ -303,6 +306,7 @@ static volatile struct padlock_cipher_da
|
|
|
96a864 |
* =======================================================
|
|
|
96a864 |
*/
|
|
|
96a864 |
# if defined(__GNUC__) && __GNUC__>=2
|
|
|
96a864 |
+# if defined(__i386__) || defined(__i386)
|
|
|
96a864 |
/*
|
|
|
96a864 |
* As for excessive "push %ebx"/"pop %ebx" found all over.
|
|
|
96a864 |
* When generating position-independent code GCC won't let
|
|
|
96a864 |
@@ -379,22 +383,6 @@ static int padlock_available(void)
|
|
|
96a864 |
return padlock_use_ace + padlock_use_rng;
|
|
|
96a864 |
}
|
|
|
96a864 |
|
|
|
96a864 |
-# ifndef OPENSSL_NO_AES
|
|
|
96a864 |
-# ifndef AES_ASM
|
|
|
96a864 |
-/* Our own htonl()/ntohl() */
|
|
|
96a864 |
-static inline void padlock_bswapl(AES_KEY *ks)
|
|
|
96a864 |
-{
|
|
|
96a864 |
- size_t i = sizeof(ks->rd_key) / sizeof(ks->rd_key[0]);
|
|
|
96a864 |
- unsigned int *key = ks->rd_key;
|
|
|
96a864 |
-
|
|
|
96a864 |
- while (i--) {
|
|
|
96a864 |
- asm volatile ("bswapl %0":"+r" (*key));
|
|
|
96a864 |
- key++;
|
|
|
96a864 |
- }
|
|
|
96a864 |
-}
|
|
|
96a864 |
-# endif
|
|
|
96a864 |
-# endif
|
|
|
96a864 |
-
|
|
|
96a864 |
/*
|
|
|
96a864 |
* Force key reload from memory to the CPU microcode. Loading EFLAGS from the
|
|
|
96a864 |
* stack clears EFLAGS[30] which does the trick.
|
|
|
96a864 |
@@ -404,7 +392,7 @@ static inline void padlock_reload_key(vo
|
|
|
96a864 |
asm volatile ("pushfl; popfl");
|
|
|
96a864 |
}
|
|
|
96a864 |
|
|
|
96a864 |
-# ifndef OPENSSL_NO_AES
|
|
|
96a864 |
+# ifndef OPENSSL_NO_AES
|
|
|
96a864 |
/*
|
|
|
96a864 |
* This is heuristic key context tracing. At first one
|
|
|
96a864 |
* believes that one should use atomic swap instructions,
|
|
|
96a864 |
@@ -448,6 +436,101 @@ static inline void *name(size_t cnt,
|
|
|
96a864 |
: "edx", "cc", "memory"); \
|
|
|
96a864 |
return iv; \
|
|
|
96a864 |
}
|
|
|
96a864 |
+# endif
|
|
|
96a864 |
+
|
|
|
96a864 |
+# elif defined(__x86_64__) || defined(__x86_64)
|
|
|
96a864 |
+
|
|
|
96a864 |
+/* Load supported features of the CPU to see if
|
|
|
96a864 |
+ the PadLock is available. */
|
|
|
96a864 |
+static int padlock_available(void)
|
|
|
96a864 |
+{
|
|
|
96a864 |
+ char vendor_string[16];
|
|
|
96a864 |
+ unsigned int eax, edx;
|
|
|
96a864 |
+
|
|
|
96a864 |
+ /* Are we running on the Centaur (VIA) CPU? */
|
|
|
96a864 |
+ eax = 0x00000000;
|
|
|
96a864 |
+ vendor_string[12] = 0;
|
|
|
96a864 |
+ asm volatile ("cpuid\n"
|
|
|
96a864 |
+ "movl %%ebx,(%1)\n"
|
|
|
96a864 |
+ "movl %%edx,4(%1)\n"
|
|
|
96a864 |
+ "movl %%ecx,8(%1)\n":"+a" (eax):"r"(vendor_string):"rbx",
|
|
|
96a864 |
+ "rcx", "rdx");
|
|
|
96a864 |
+ if (strcmp(vendor_string, "CentaurHauls") != 0)
|
|
|
96a864 |
+ return 0;
|
|
|
96a864 |
+
|
|
|
96a864 |
+ /* Check for Centaur Extended Feature Flags presence */
|
|
|
96a864 |
+ eax = 0xC0000000;
|
|
|
96a864 |
+ asm volatile ("cpuid":"+a" (eax)::"rbx", "rcx", "rdx");
|
|
|
96a864 |
+ if (eax < 0xC0000001)
|
|
|
96a864 |
+ return 0;
|
|
|
96a864 |
+
|
|
|
96a864 |
+ /* Read the Centaur Extended Feature Flags */
|
|
|
96a864 |
+ eax = 0xC0000001;
|
|
|
96a864 |
+ asm volatile ("cpuid":"+a" (eax), "=d"(edx)::"rbx", "rcx");
|
|
|
96a864 |
+
|
|
|
96a864 |
+ /* Fill up some flags */
|
|
|
96a864 |
+ padlock_use_ace = ((edx & (0x3 << 6)) == (0x3 << 6));
|
|
|
96a864 |
+ padlock_use_rng = ((edx & (0x3 << 2)) == (0x3 << 2));
|
|
|
96a864 |
+
|
|
|
96a864 |
+ return padlock_use_ace + padlock_use_rng;
|
|
|
96a864 |
+}
|
|
|
96a864 |
+
|
|
|
96a864 |
+/* Force key reload from memory to the CPU microcode.
|
|
|
96a864 |
+ Loading EFLAGS from the stack clears EFLAGS[30]
|
|
|
96a864 |
+ which does the trick. */
|
|
|
96a864 |
+static inline void padlock_reload_key(void)
|
|
|
96a864 |
+{
|
|
|
96a864 |
+ asm volatile ("pushfq; popfq");
|
|
|
96a864 |
+}
|
|
|
96a864 |
+
|
|
|
96a864 |
+# ifndef OPENSSL_NO_AES
|
|
|
96a864 |
+/*
|
|
|
96a864 |
+ * This is heuristic key context tracing. At first one
|
|
|
96a864 |
+ * believes that one should use atomic swap instructions,
|
|
|
96a864 |
+ * but it's not actually necessary. Point is that if
|
|
|
96a864 |
+ * padlock_saved_context was changed by another thread
|
|
|
96a864 |
+ * after we've read it and before we compare it with cdata,
|
|
|
96a864 |
+ * our key *shall* be reloaded upon thread context switch
|
|
|
96a864 |
+ * and we are therefore set in either case...
|
|
|
96a864 |
+ */
|
|
|
96a864 |
+static inline void padlock_verify_context(struct padlock_cipher_data *cdata)
|
|
|
96a864 |
+{
|
|
|
96a864 |
+ asm volatile ("pushfq\n"
|
|
|
96a864 |
+ " btl $30,(%%rsp)\n"
|
|
|
96a864 |
+ " jnc 1f\n"
|
|
|
96a864 |
+ " cmpq %2,%1\n"
|
|
|
96a864 |
+ " je 1f\n"
|
|
|
96a864 |
+ " popfq\n"
|
|
|
96a864 |
+ " subq $8,%%rsp\n"
|
|
|
96a864 |
+ "1: addq $8,%%rsp\n"
|
|
|
96a864 |
+ " movq %2,%0":"+m" (padlock_saved_context)
|
|
|
96a864 |
+ :"r"(padlock_saved_context), "r"(cdata):"cc");
|
|
|
96a864 |
+}
|
|
|
96a864 |
+
|
|
|
96a864 |
+/* Template for padlock_xcrypt_* modes */
|
|
|
96a864 |
+/* BIG FAT WARNING:
|
|
|
96a864 |
+ * The offsets used with 'leal' instructions
|
|
|
96a864 |
+ * describe items of the 'padlock_cipher_data'
|
|
|
96a864 |
+ * structure.
|
|
|
96a864 |
+ */
|
|
|
96a864 |
+# define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \
|
|
|
96a864 |
+static inline void *name(size_t cnt, \
|
|
|
96a864 |
+ struct padlock_cipher_data *cdata, \
|
|
|
96a864 |
+ void *out, const void *inp) \
|
|
|
96a864 |
+{ void *iv; \
|
|
|
96a864 |
+ asm volatile ( "leaq 16(%0),%%rdx\n" \
|
|
|
96a864 |
+ " leaq 32(%0),%%rbx\n" \
|
|
|
96a864 |
+ rep_xcrypt "\n" \
|
|
|
96a864 |
+ : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
|
|
|
96a864 |
+ : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \
|
|
|
96a864 |
+ : "rbx", "rdx", "cc", "memory"); \
|
|
|
96a864 |
+ return iv; \
|
|
|
96a864 |
+}
|
|
|
96a864 |
+# endif
|
|
|
96a864 |
+
|
|
|
96a864 |
+# endif /* cpu */
|
|
|
96a864 |
+
|
|
|
96a864 |
+# ifndef OPENSSL_NO_AES
|
|
|
96a864 |
|
|
|
96a864 |
/* Generate all functions with appropriate opcodes */
|
|
|
96a864 |
/* rep xcryptecb */
|
|
|
96a864 |
@@ -458,6 +541,20 @@ PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, "
|
|
|
96a864 |
PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0")
|
|
|
96a864 |
/* rep xcryptofb */
|
|
|
96a864 |
PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8")
|
|
|
96a864 |
+
|
|
|
96a864 |
+# ifndef AES_ASM
|
|
|
96a864 |
+/* Our own htonl()/ntohl() */
|
|
|
96a864 |
+static inline void padlock_bswapl(AES_KEY *ks)
|
|
|
96a864 |
+{
|
|
|
96a864 |
+ size_t i = sizeof(ks->rd_key) / sizeof(ks->rd_key[0]);
|
|
|
96a864 |
+ unsigned int *key = ks->rd_key;
|
|
|
96a864 |
+
|
|
|
96a864 |
+ while (i--) {
|
|
|
96a864 |
+ asm volatile ("bswapl %0":"+r" (*key));
|
|
|
96a864 |
+ key++;
|
|
|
96a864 |
+ }
|
|
|
96a864 |
+}
|
|
|
96a864 |
+# endif
|
|
|
96a864 |
# endif
|
|
|
96a864 |
/* The RNG call itself */
|
|
|
96a864 |
static inline unsigned int padlock_xstore(void *addr, unsigned int edx_in)
|
|
|
96a864 |
@@ -485,8 +582,8 @@ static inline unsigned int padlock_xstor
|
|
|
96a864 |
static inline unsigned char *padlock_memcpy(void *dst, const void *src,
|
|
|
96a864 |
size_t n)
|
|
|
96a864 |
{
|
|
|
96a864 |
- long *d = dst;
|
|
|
96a864 |
- const long *s = src;
|
|
|
96a864 |
+ size_t *d = dst;
|
|
|
96a864 |
+ const size_t *s = src;
|
|
|
96a864 |
|
|
|
96a864 |
n /= sizeof(*d);
|
|
|
96a864 |
do {
|