|
|
e0be8b |
diff --git a/cipher/crc-ppc.c b/cipher/crc-ppc.c
|
|
|
e0be8b |
index 4d7f0add..b9a40130 100644
|
|
|
e0be8b |
--- a/cipher/crc-ppc.c
|
|
|
e0be8b |
+++ b/cipher/crc-ppc.c
|
|
|
e0be8b |
@@ -154,26 +154,63 @@ static const vector16x_u8 bswap_const ALIGNED_64 =
|
|
|
e0be8b |
#ifdef WORDS_BIGENDIAN
|
|
|
e0be8b |
# define CRC_VEC_U64_DEF(lo, hi) { (hi), (lo) }
|
|
|
e0be8b |
# define CRC_VEC_U64_LOAD(offs, ptr) \
|
|
|
e0be8b |
- asm_swap_u64(vec_vsx_ld((offs), (const unsigned long long *)(ptr)))
|
|
|
e0be8b |
+ asm_swap_u64(asm_vec_u64_load(offs, ptr))
|
|
|
e0be8b |
# define CRC_VEC_U64_LOAD_LE(offs, ptr) \
|
|
|
e0be8b |
- CRC_VEC_SWAP(vec_vsx_ld((offs), (const unsigned long long *)(ptr)))
|
|
|
e0be8b |
+ CRC_VEC_SWAP(asm_vec_u64_load(offs, ptr))
|
|
|
e0be8b |
# define CRC_VEC_U64_LOAD_BE(offs, ptr) \
|
|
|
e0be8b |
- vec_vsx_ld((offs), (const unsigned long long *)(ptr))
|
|
|
e0be8b |
+ asm_vec_u64_load(offs, ptr)
|
|
|
e0be8b |
# define CRC_VEC_SWAP_TO_LE(v) CRC_VEC_SWAP(v)
|
|
|
e0be8b |
# define CRC_VEC_SWAP_TO_BE(v) (v)
|
|
|
e0be8b |
# define VEC_U64_LO 1
|
|
|
e0be8b |
# define VEC_U64_HI 0
|
|
|
e0be8b |
+
|
|
|
e0be8b |
+static ASM_FUNC_ATTR_INLINE vector2x_u64
|
|
|
e0be8b |
+asm_vec_u64_load(unsigned long offset, const void *ptr)
|
|
|
e0be8b |
+{
|
|
|
e0be8b |
+ vector2x_u64 vecu64;
|
|
|
e0be8b |
+#if __GNUC__ >= 4
|
|
|
e0be8b |
+ if (__builtin_constant_p (offset) && offset == 0)
|
|
|
e0be8b |
+ __asm__ volatile ("lxvd2x %x0,0,%1\n\t"
|
|
|
e0be8b |
+ : "=wa" (vecu64)
|
|
|
e0be8b |
+ : "r" ((uintptr_t)ptr)
|
|
|
e0be8b |
+ : "memory");
|
|
|
e0be8b |
+ else
|
|
|
e0be8b |
+#endif
|
|
|
e0be8b |
+ __asm__ volatile ("lxvd2x %x0,%1,%2\n\t"
|
|
|
e0be8b |
+ : "=wa" (vecu64)
|
|
|
e0be8b |
+ : "r" (offset), "r" ((uintptr_t)ptr)
|
|
|
e0be8b |
+ : "memory", "r0");
|
|
|
e0be8b |
+ return vecu64;
|
|
|
e0be8b |
+}
|
|
|
e0be8b |
#else
|
|
|
e0be8b |
# define CRC_VEC_U64_DEF(lo, hi) { (lo), (hi) }
|
|
|
e0be8b |
-# define CRC_VEC_U64_LOAD(offs, ptr) \
|
|
|
e0be8b |
- vec_vsx_ld((offs), (const unsigned long long *)(ptr))
|
|
|
e0be8b |
-# define CRC_VEC_U64_LOAD_LE(offs, ptr) CRC_VEC_U64_LOAD((offs), (ptr))
|
|
|
e0be8b |
+# define CRC_VEC_U64_LOAD(offs, ptr) asm_vec_u64_load_le(offs, ptr)
|
|
|
e0be8b |
+# define CRC_VEC_U64_LOAD_LE(offs, ptr) asm_vec_u64_load_le(offs, ptr)
|
|
|
e0be8b |
# define CRC_VEC_U64_LOAD_BE(offs, ptr) asm_vec_u64_load_be(offs, ptr)
|
|
|
e0be8b |
# define CRC_VEC_SWAP_TO_LE(v) (v)
|
|
|
e0be8b |
# define CRC_VEC_SWAP_TO_BE(v) CRC_VEC_SWAP(v)
|
|
|
e0be8b |
# define VEC_U64_LO 0
|
|
|
e0be8b |
# define VEC_U64_HI 1
|
|
|
e0be8b |
|
|
|
e0be8b |
+static ASM_FUNC_ATTR_INLINE vector2x_u64
|
|
|
e0be8b |
+asm_vec_u64_load_le(unsigned long offset, const void *ptr)
|
|
|
e0be8b |
+{
|
|
|
e0be8b |
+ vector2x_u64 vecu64;
|
|
|
e0be8b |
+#if __GNUC__ >= 4
|
|
|
e0be8b |
+ if (__builtin_constant_p (offset) && offset == 0)
|
|
|
e0be8b |
+ __asm__ volatile ("lxvd2x %x0,0,%1\n\t"
|
|
|
e0be8b |
+ : "=wa" (vecu64)
|
|
|
e0be8b |
+ : "r" ((uintptr_t)ptr)
|
|
|
e0be8b |
+ : "memory");
|
|
|
e0be8b |
+ else
|
|
|
e0be8b |
+#endif
|
|
|
e0be8b |
+ __asm__ volatile ("lxvd2x %x0,%1,%2\n\t"
|
|
|
e0be8b |
+ : "=wa" (vecu64)
|
|
|
e0be8b |
+ : "r" (offset), "r" ((uintptr_t)ptr)
|
|
|
e0be8b |
+ : "memory", "r0");
|
|
|
e0be8b |
+ return asm_swap_u64(vecu64);
|
|
|
e0be8b |
+}
|
|
|
e0be8b |
+
|
|
|
e0be8b |
static ASM_FUNC_ATTR_INLINE vector2x_u64
|
|
|
e0be8b |
asm_vec_u64_load_be(unsigned int offset, const void *ptr)
|
|
|
e0be8b |
{
|
|
|
e0be8b |
diff --git a/cipher/sha512-ppc.c b/cipher/sha512-ppc.c
|
|
|
e0be8b |
index a758e1ea..31ea25bf 100644
|
|
|
e0be8b |
--- a/cipher/sha512-ppc.c
|
|
|
e0be8b |
+++ b/cipher/sha512-ppc.c
|
|
|
e0be8b |
@@ -115,14 +115,62 @@ vec_merge_idx0_elems(vector2x_u64 v0, vector2x_u64 v1)
|
|
|
e0be8b |
static ASM_FUNC_ATTR_INLINE vector2x_u64
|
|
|
e0be8b |
vec_vshasigma_u64(vector2x_u64 v, unsigned int a, unsigned int b)
|
|
|
e0be8b |
{
|
|
|
e0be8b |
- asm ("vshasigmad %0,%1,%2,%3"
|
|
|
e0be8b |
- : "=v" (v)
|
|
|
e0be8b |
- : "v" (v), "g" (a), "g" (b)
|
|
|
e0be8b |
- : "memory");
|
|
|
e0be8b |
+ __asm__ ("vshasigmad %0,%1,%2,%3"
|
|
|
e0be8b |
+ : "=v" (v)
|
|
|
e0be8b |
+ : "v" (v), "g" (a), "g" (b)
|
|
|
e0be8b |
+ : "memory");
|
|
|
e0be8b |
return v;
|
|
|
e0be8b |
}
|
|
|
e0be8b |
|
|
|
e0be8b |
|
|
|
e0be8b |
+static ASM_FUNC_ATTR_INLINE vector2x_u64
|
|
|
e0be8b |
+vec_u64_load(unsigned long offset, const void *ptr)
|
|
|
e0be8b |
+{
|
|
|
e0be8b |
+ vector2x_u64 vecu64;
|
|
|
e0be8b |
+#if __GNUC__ >= 4
|
|
|
e0be8b |
+ if (__builtin_constant_p (offset) && offset == 0)
|
|
|
e0be8b |
+ __asm__ ("lxvd2x %x0,0,%1\n\t"
|
|
|
e0be8b |
+ : "=wa" (vecu64)
|
|
|
e0be8b |
+ : "r" ((uintptr_t)ptr)
|
|
|
e0be8b |
+ : "memory");
|
|
|
e0be8b |
+ else
|
|
|
e0be8b |
+#endif
|
|
|
e0be8b |
+ __asm__ ("lxvd2x %x0,%1,%2\n\t"
|
|
|
e0be8b |
+ : "=wa" (vecu64)
|
|
|
e0be8b |
+ : "r" (offset), "r" ((uintptr_t)ptr)
|
|
|
e0be8b |
+ : "memory", "r0");
|
|
|
e0be8b |
+#ifndef WORDS_BIGENDIAN
|
|
|
e0be8b |
+ __asm__ ("xxswapd %x0, %x1"
|
|
|
e0be8b |
+ : "=wa" (vecu64)
|
|
|
e0be8b |
+ : "wa" (vecu64));
|
|
|
e0be8b |
+#endif
|
|
|
e0be8b |
+ return vecu64;
|
|
|
e0be8b |
+}
|
|
|
e0be8b |
+
|
|
|
e0be8b |
+
|
|
|
e0be8b |
+static ASM_FUNC_ATTR_INLINE void
|
|
|
e0be8b |
+vec_u64_store(vector2x_u64 vecu64, unsigned long offset, void *ptr)
|
|
|
e0be8b |
+{
|
|
|
e0be8b |
+#ifndef WORDS_BIGENDIAN
|
|
|
e0be8b |
+ __asm__ ("xxswapd %x0, %x1"
|
|
|
e0be8b |
+ : "=wa" (vecu64)
|
|
|
e0be8b |
+ : "wa" (vecu64));
|
|
|
e0be8b |
+#endif
|
|
|
e0be8b |
+#if __GNUC__ >= 4
|
|
|
e0be8b |
+ if (__builtin_constant_p (offset) && offset == 0)
|
|
|
e0be8b |
+ __asm__ ("stxvd2x %x0,0,%1\n\t"
|
|
|
e0be8b |
+ :
|
|
|
e0be8b |
+ : "wa" (vecu64), "r" ((uintptr_t)ptr)
|
|
|
e0be8b |
+ : "memory");
|
|
|
e0be8b |
+ else
|
|
|
e0be8b |
+#endif
|
|
|
e0be8b |
+ __asm__ ("stxvd2x %x0,%1,%2\n\t"
|
|
|
e0be8b |
+ :
|
|
|
e0be8b |
+ : "wa" (vecu64), "r" (offset), "r" ((uintptr_t)ptr)
|
|
|
e0be8b |
+ : "memory", "r0");
|
|
|
e0be8b |
+}
|
|
|
e0be8b |
+
|
|
|
e0be8b |
+
|
|
|
e0be8b |
/* SHA2 round in vector registers */
|
|
|
e0be8b |
#define R(a,b,c,d,e,f,g,h,k,w) do \
|
|
|
e0be8b |
{ \
|
|
|
e0be8b |
@@ -168,13 +216,13 @@ _gcry_sha512_transform_ppc8(u64 state[8],
|
|
|
e0be8b |
vector2x_u64 a, b, c, d, e, f, g, h, t1, t2;
|
|
|
e0be8b |
u64 w[16];
|
|
|
e0be8b |
|
|
|
e0be8b |
- h0 = vec_vsx_ld (8 * 0, (unsigned long long *)state);
|
|
|
e0be8b |
+ h0 = vec_u64_load (8 * 0, (unsigned long long *)state);
|
|
|
e0be8b |
h1 = vec_rol_elems (h0, 1);
|
|
|
e0be8b |
- h2 = vec_vsx_ld (8 * 2, (unsigned long long *)state);
|
|
|
e0be8b |
+ h2 = vec_u64_load (8 * 2, (unsigned long long *)state);
|
|
|
e0be8b |
h3 = vec_rol_elems (h2, 1);
|
|
|
e0be8b |
- h4 = vec_vsx_ld (8 * 4, (unsigned long long *)state);
|
|
|
e0be8b |
+ h4 = vec_u64_load (8 * 4, (unsigned long long *)state);
|
|
|
e0be8b |
h5 = vec_rol_elems (h4, 1);
|
|
|
e0be8b |
- h6 = vec_vsx_ld (8 * 6, (unsigned long long *)state);
|
|
|
e0be8b |
+ h6 = vec_u64_load (8 * 6, (unsigned long long *)state);
|
|
|
e0be8b |
h7 = vec_rol_elems (h6, 1);
|
|
|
e0be8b |
|
|
|
e0be8b |
while (nblks >= 2)
|
|
|
e0be8b |
@@ -514,10 +562,10 @@ _gcry_sha512_transform_ppc8(u64 state[8],
|
|
|
e0be8b |
h2 = vec_merge_idx0_elems (h2, h3);
|
|
|
e0be8b |
h4 = vec_merge_idx0_elems (h4, h5);
|
|
|
e0be8b |
h6 = vec_merge_idx0_elems (h6, h7);
|
|
|
e0be8b |
- vec_vsx_st (h0, 8 * 0, (unsigned long long *)state);
|
|
|
e0be8b |
- vec_vsx_st (h2, 8 * 2, (unsigned long long *)state);
|
|
|
e0be8b |
- vec_vsx_st (h4, 8 * 4, (unsigned long long *)state);
|
|
|
e0be8b |
- vec_vsx_st (h6, 8 * 6, (unsigned long long *)state);
|
|
|
e0be8b |
+ vec_u64_store (h0, 8 * 0, (unsigned long long *)state);
|
|
|
e0be8b |
+ vec_u64_store (h2, 8 * 2, (unsigned long long *)state);
|
|
|
e0be8b |
+ vec_u64_store (h4, 8 * 4, (unsigned long long *)state);
|
|
|
e0be8b |
+ vec_u64_store (h6, 8 * 6, (unsigned long long *)state);
|
|
|
e0be8b |
|
|
|
e0be8b |
return sizeof(w);
|
|
|
e0be8b |
}
|
|
|
e0be8b |
diff --git a/configure.ac b/configure.ac
|
|
|
e0be8b |
index b6b6455a..be35ce42 100644
|
|
|
e0be8b |
--- a/configure.ac
|
|
|
e0be8b |
+++ b/configure.ac
|
|
|
e0be8b |
@@ -1745,10 +1745,12 @@ AC_CACHE_CHECK([whether compiler supports PowerPC AltiVec/VSX intrinsics],
|
|
|
e0be8b |
AC_COMPILE_IFELSE([AC_LANG_SOURCE(
|
|
|
e0be8b |
[[#include <altivec.h>
|
|
|
e0be8b |
typedef vector unsigned char block;
|
|
|
e0be8b |
+ typedef vector unsigned int vecu32;
|
|
|
e0be8b |
block fn(block in)
|
|
|
e0be8b |
{
|
|
|
e0be8b |
block t = vec_perm (in, in, vec_vsx_ld (0, (unsigned char*)0));
|
|
|
e0be8b |
- return vec_cipher_be (t, in);
|
|
|
e0be8b |
+ vecu32 y = vec_vsx_ld (0, (unsigned int*)0);
|
|
|
e0be8b |
+ return vec_cipher_be (t, in) ^ (block)y;
|
|
|
e0be8b |
}
|
|
|
e0be8b |
]])],
|
|
|
e0be8b |
[gcry_cv_cc_ppc_altivec=yes])
|
|
|
e0be8b |
@@ -1769,10 +1771,12 @@ if test "$gcry_cv_cc_ppc_altivec" = "no" &&
|
|
|
e0be8b |
AC_COMPILE_IFELSE([AC_LANG_SOURCE(
|
|
|
e0be8b |
[[#include <altivec.h>
|
|
|
e0be8b |
typedef vector unsigned char block;
|
|
|
e0be8b |
+ typedef vector unsigned int vecu32;
|
|
|
e0be8b |
block fn(block in)
|
|
|
e0be8b |
{
|
|
|
e0be8b |
block t = vec_perm (in, in, vec_vsx_ld (0, (unsigned char*)0));
|
|
|
e0be8b |
- return vec_cipher_be (t, in);
|
|
|
e0be8b |
+ vecu32 y = vec_vsx_ld (0, (unsigned int*)0);
|
|
|
e0be8b |
+ return vec_cipher_be (t, in) ^ (block)y;
|
|
|
e0be8b |
}]])],
|
|
|
e0be8b |
[gcry_cv_cc_ppc_altivec_cflags=yes])])
|
|
|
e0be8b |
if test "$gcry_cv_cc_ppc_altivec_cflags" = "yes" ; then
|
|
|
e0be8b |
|
|
|
e0be8b |
diff --git a/configure.ac b/configure.ac
|
|
|
e0be8b |
index 202ac888..fd447906 100644
|
|
|
e0be8b |
--- a/configure.ac
|
|
|
e0be8b |
+++ b/configure.ac
|
|
|
e0be8b |
@@ -2562,13 +2562,13 @@ if test "$found" = "1" ; then
|
|
|
e0be8b |
GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc-intel-pclmul.lo"
|
|
|
e0be8b |
;;
|
|
|
e0be8b |
powerpc64le-*-*)
|
|
|
e0be8b |
- GCRYPT_CIPHERS="$GCRYPT_CIPHERS crc-ppc.lo"
|
|
|
e0be8b |
+ GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc-ppc.lo"
|
|
|
e0be8b |
;;
|
|
|
e0be8b |
powerpc64-*-*)
|
|
|
e0be8b |
- GCRYPT_CIPHERS="$GCRYPT_CIPHERS crc-ppc.lo"
|
|
|
e0be8b |
+ GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc-ppc.lo"
|
|
|
e0be8b |
;;
|
|
|
e0be8b |
powerpc-*-*)
|
|
|
e0be8b |
- GCRYPT_CIPHERS="$GCRYPT_CIPHERS crc-ppc.lo"
|
|
|
e0be8b |
+ GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc-ppc.lo"
|
|
|
e0be8b |
;;
|
|
|
e0be8b |
esac
|
|
|
e0be8b |
fi
|
|
|
e0be8b |
@@ -2635,17 +2635,17 @@ if test "$found" = "1" ; then
|
|
|
e0be8b |
;;
|
|
|
e0be8b |
powerpc64le-*-*)
|
|
|
e0be8b |
# Build with the crypto extension implementation
|
|
|
e0be8b |
- GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha256-ppc.lo"
|
|
|
e0be8b |
+ GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-ppc.lo"
|
|
|
e0be8b |
;;
|
|
|
e0be8b |
powerpc64-*-*)
|
|
|
e0be8b |
# Big-Endian.
|
|
|
e0be8b |
# Build with the crypto extension implementation
|
|
|
e0be8b |
- GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha256-ppc.lo"
|
|
|
e0be8b |
+ GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-ppc.lo"
|
|
|
e0be8b |
;;
|
|
|
e0be8b |
powerpc-*-*)
|
|
|
e0be8b |
# Big-Endian.
|
|
|
e0be8b |
# Build with the crypto extension implementation
|
|
|
e0be8b |
- GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha256-ppc.lo"
|
|
|
e0be8b |
+ GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-ppc.lo"
|
|
|
e0be8b |
esac
|
|
|
e0be8b |
fi
|
|
|
e0be8b |
|
|
|
e0be8b |
@@ -2667,17 +2667,17 @@ if test "$found" = "1" ; then
|
|
|
e0be8b |
;;
|
|
|
e0be8b |
powerpc64le-*-*)
|
|
|
e0be8b |
# Build with the crypto extension implementation
|
|
|
e0be8b |
- GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha512-ppc.lo"
|
|
|
e0be8b |
+ GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-ppc.lo"
|
|
|
e0be8b |
;;
|
|
|
e0be8b |
powerpc64-*-*)
|
|
|
e0be8b |
# Big-Endian.
|
|
|
e0be8b |
# Build with the crypto extension implementation
|
|
|
e0be8b |
- GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha512-ppc.lo"
|
|
|
e0be8b |
+ GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-ppc.lo"
|
|
|
e0be8b |
;;
|
|
|
e0be8b |
powerpc-*-*)
|
|
|
e0be8b |
# Big-Endian.
|
|
|
e0be8b |
# Build with the crypto extension implementation
|
|
|
e0be8b |
- GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha512-ppc.lo"
|
|
|
e0be8b |
+ GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-ppc.lo"
|
|
|
e0be8b |
esac
|
|
|
e0be8b |
|
|
|
e0be8b |
if test x"$neonsupport" = xyes ; then
|