Blob Blame History Raw
diff --git a/cipher/crc-ppc.c b/cipher/crc-ppc.c
index 4d7f0add..b9a40130 100644
--- a/cipher/crc-ppc.c
+++ b/cipher/crc-ppc.c
@@ -154,26 +154,63 @@ static const vector16x_u8 bswap_const ALIGNED_64 =
 #ifdef WORDS_BIGENDIAN
 # define CRC_VEC_U64_DEF(lo, hi) { (hi), (lo) }
 # define CRC_VEC_U64_LOAD(offs, ptr) \
-          asm_swap_u64(vec_vsx_ld((offs), (const unsigned long long *)(ptr)))
+	  asm_swap_u64(asm_vec_u64_load(offs, ptr))
 # define CRC_VEC_U64_LOAD_LE(offs, ptr) \
-	  CRC_VEC_SWAP(vec_vsx_ld((offs), (const unsigned long long *)(ptr)))
+	  CRC_VEC_SWAP(asm_vec_u64_load(offs, ptr))
 # define CRC_VEC_U64_LOAD_BE(offs, ptr) \
-         vec_vsx_ld((offs), (const unsigned long long *)(ptr))
+	  asm_vec_u64_load(offs, ptr)
 # define CRC_VEC_SWAP_TO_LE(v) CRC_VEC_SWAP(v)
 # define CRC_VEC_SWAP_TO_BE(v) (v)
 # define VEC_U64_LO 1
 # define VEC_U64_HI 0
+
+static ASM_FUNC_ATTR_INLINE vector2x_u64
+asm_vec_u64_load(unsigned long offset, const void *ptr)
+{
+  vector2x_u64 vecu64;
+#if __GNUC__ >= 4
+  if (__builtin_constant_p (offset) && offset == 0)
+    __asm__ volatile ("lxvd2x %x0,0,%1\n\t"
+		      : "=wa" (vecu64)
+		      : "r" ((uintptr_t)ptr)
+		      : "memory");
+  else
+#endif
+    __asm__ volatile ("lxvd2x %x0,%1,%2\n\t"
+		      : "=wa" (vecu64)
+		      : "r" (offset), "r" ((uintptr_t)ptr)
+		      : "memory", "r0");
+  return vecu64;
+}
 #else
 # define CRC_VEC_U64_DEF(lo, hi) { (lo), (hi) }
-# define CRC_VEC_U64_LOAD(offs, ptr) \
-	  vec_vsx_ld((offs), (const unsigned long long *)(ptr))
-# define CRC_VEC_U64_LOAD_LE(offs, ptr) CRC_VEC_U64_LOAD((offs), (ptr))
+# define CRC_VEC_U64_LOAD(offs, ptr) asm_vec_u64_load_le(offs, ptr)
+# define CRC_VEC_U64_LOAD_LE(offs, ptr) asm_vec_u64_load_le(offs, ptr)
 # define CRC_VEC_U64_LOAD_BE(offs, ptr) asm_vec_u64_load_be(offs, ptr)
 # define CRC_VEC_SWAP_TO_LE(v) (v)
 # define CRC_VEC_SWAP_TO_BE(v) CRC_VEC_SWAP(v)
 # define VEC_U64_LO 0
 # define VEC_U64_HI 1
 
+static ASM_FUNC_ATTR_INLINE vector2x_u64
+asm_vec_u64_load_le(unsigned long offset, const void *ptr)
+{
+  vector2x_u64 vecu64;
+#if __GNUC__ >= 4
+  if (__builtin_constant_p (offset) && offset == 0)
+    __asm__ volatile ("lxvd2x %x0,0,%1\n\t"
+		      : "=wa" (vecu64)
+		      : "r" ((uintptr_t)ptr)
+		      : "memory");
+  else
+#endif
+    __asm__ volatile ("lxvd2x %x0,%1,%2\n\t"
+		      : "=wa" (vecu64)
+		      : "r" (offset), "r" ((uintptr_t)ptr)
+		      : "memory", "r0");
+  return asm_swap_u64(vecu64);
+}
+
 static ASM_FUNC_ATTR_INLINE vector2x_u64
 asm_vec_u64_load_be(unsigned int offset, const void *ptr)
 {
diff --git a/cipher/sha512-ppc.c b/cipher/sha512-ppc.c
index a758e1ea..31ea25bf 100644
--- a/cipher/sha512-ppc.c
+++ b/cipher/sha512-ppc.c
@@ -115,14 +115,62 @@ vec_merge_idx0_elems(vector2x_u64 v0, vector2x_u64 v1)
 static ASM_FUNC_ATTR_INLINE vector2x_u64
 vec_vshasigma_u64(vector2x_u64 v, unsigned int a, unsigned int b)
 {
-  asm ("vshasigmad %0,%1,%2,%3"
-       : "=v" (v)
-       : "v" (v), "g" (a), "g" (b)
-       : "memory");
+  __asm__ ("vshasigmad %0,%1,%2,%3"
+	   : "=v" (v)
+	   : "v" (v), "g" (a), "g" (b)
+	   : "memory");
   return v;
 }
 
 
+static ASM_FUNC_ATTR_INLINE vector2x_u64
+vec_u64_load(unsigned long offset, const void *ptr)
+{
+  vector2x_u64 vecu64;
+#if __GNUC__ >= 4
+  if (__builtin_constant_p (offset) && offset == 0)
+    __asm__ ("lxvd2x %x0,0,%1\n\t"
+	     : "=wa" (vecu64)
+	     : "r" ((uintptr_t)ptr)
+	     : "memory");
+  else
+#endif
+    __asm__ ("lxvd2x %x0,%1,%2\n\t"
+	     : "=wa" (vecu64)
+	     : "r" (offset), "r" ((uintptr_t)ptr)
+	     : "memory", "r0");
+#ifndef WORDS_BIGENDIAN
+  __asm__ ("xxswapd %x0, %x1"
+	   : "=wa" (vecu64)
+	   : "wa" (vecu64));
+#endif
+  return vecu64;
+}
+
+
+static ASM_FUNC_ATTR_INLINE void
+vec_u64_store(vector2x_u64 vecu64, unsigned long offset, void *ptr)
+{
+#ifndef WORDS_BIGENDIAN
+  __asm__ ("xxswapd %x0, %x1"
+	   : "=wa" (vecu64)
+	   : "wa" (vecu64));
+#endif
+#if __GNUC__ >= 4
+  if (__builtin_constant_p (offset) && offset == 0)
+    __asm__ ("stxvd2x %x0,0,%1\n\t"
+	     :
+	     : "wa" (vecu64), "r" ((uintptr_t)ptr)
+	     : "memory");
+  else
+#endif
+    __asm__ ("stxvd2x %x0,%1,%2\n\t"
+	     :
+	     : "wa" (vecu64), "r" (offset), "r" ((uintptr_t)ptr)
+	     : "memory", "r0");
+}
+
+
 /* SHA2 round in vector registers */
 #define R(a,b,c,d,e,f,g,h,k,w) do                             \
     {                                                         \
@@ -168,13 +216,13 @@ _gcry_sha512_transform_ppc8(u64 state[8],
   vector2x_u64 a, b, c, d, e, f, g, h, t1, t2;
   u64 w[16];
 
-  h0 = vec_vsx_ld (8 * 0, (unsigned long long *)state);
+  h0 = vec_u64_load (8 * 0, (unsigned long long *)state);
   h1 = vec_rol_elems (h0, 1);
-  h2 = vec_vsx_ld (8 * 2, (unsigned long long *)state);
+  h2 = vec_u64_load (8 * 2, (unsigned long long *)state);
   h3 = vec_rol_elems (h2, 1);
-  h4 = vec_vsx_ld (8 * 4, (unsigned long long *)state);
+  h4 = vec_u64_load (8 * 4, (unsigned long long *)state);
   h5 = vec_rol_elems (h4, 1);
-  h6 = vec_vsx_ld (8 * 6, (unsigned long long *)state);
+  h6 = vec_u64_load (8 * 6, (unsigned long long *)state);
   h7 = vec_rol_elems (h6, 1);
 
   while (nblks >= 2)
@@ -514,10 +562,10 @@ _gcry_sha512_transform_ppc8(u64 state[8],
   h2 = vec_merge_idx0_elems (h2, h3);
   h4 = vec_merge_idx0_elems (h4, h5);
   h6 = vec_merge_idx0_elems (h6, h7);
-  vec_vsx_st (h0, 8 * 0, (unsigned long long *)state);
-  vec_vsx_st (h2, 8 * 2, (unsigned long long *)state);
-  vec_vsx_st (h4, 8 * 4, (unsigned long long *)state);
-  vec_vsx_st (h6, 8 * 6, (unsigned long long *)state);
+  vec_u64_store (h0, 8 * 0, (unsigned long long *)state);
+  vec_u64_store (h2, 8 * 2, (unsigned long long *)state);
+  vec_u64_store (h4, 8 * 4, (unsigned long long *)state);
+  vec_u64_store (h6, 8 * 6, (unsigned long long *)state);
 
   return sizeof(w);
 }
diff --git a/configure.ac b/configure.ac
index b6b6455a..be35ce42 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1745,10 +1745,12 @@ AC_CACHE_CHECK([whether compiler supports PowerPC AltiVec/VSX intrinsics],
 	AC_COMPILE_IFELSE([AC_LANG_SOURCE(
 	[[#include <altivec.h>
 	  typedef vector unsigned char block;
+	  typedef vector unsigned int vecu32;
 	  block fn(block in)
 	  {
 	    block t = vec_perm (in, in, vec_vsx_ld (0, (unsigned char*)0));
-	    return vec_cipher_be (t, in);
+	    vecu32 y = vec_vsx_ld (0, (unsigned int*)0);
+	    return vec_cipher_be (t, in) ^ (block)y;
 	  }
 	  ]])],
 	[gcry_cv_cc_ppc_altivec=yes])
@@ -1769,10 +1771,12 @@ if test "$gcry_cv_cc_ppc_altivec" = "no" &&
     AC_COMPILE_IFELSE([AC_LANG_SOURCE(
       [[#include <altivec.h>
 	typedef vector unsigned char block;
+	typedef vector unsigned int vecu32;
 	block fn(block in)
 	{
 	  block t = vec_perm (in, in, vec_vsx_ld (0, (unsigned char*)0));
-	  return vec_cipher_be (t, in);
+	  vecu32 y = vec_vsx_ld (0, (unsigned int*)0);
+	  return vec_cipher_be (t, in) ^ (block)y;
 	}]])],
       [gcry_cv_cc_ppc_altivec_cflags=yes])])
   if test "$gcry_cv_cc_ppc_altivec_cflags" = "yes" ; then

diff --git a/configure.ac b/configure.ac
index 202ac888..fd447906 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2562,13 +2562,13 @@ if test "$found" = "1" ; then
          GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc-intel-pclmul.lo"
       ;;
       powerpc64le-*-*)
-         GCRYPT_CIPHERS="$GCRYPT_CIPHERS crc-ppc.lo"
+         GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc-ppc.lo"
       ;;
       powerpc64-*-*)
-         GCRYPT_CIPHERS="$GCRYPT_CIPHERS crc-ppc.lo"
+         GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc-ppc.lo"
       ;;
       powerpc-*-*)
-         GCRYPT_CIPHERS="$GCRYPT_CIPHERS crc-ppc.lo"
+         GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc-ppc.lo"
       ;;
    esac
 fi
@@ -2635,17 +2635,17 @@ if test "$found" = "1" ; then
       ;;
       powerpc64le-*-*)
          # Build with the crypto extension implementation
-         GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha256-ppc.lo"
+         GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-ppc.lo"
       ;;
       powerpc64-*-*)
          # Big-Endian.
          # Build with the crypto extension implementation
-         GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha256-ppc.lo"
+         GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-ppc.lo"
       ;;
       powerpc-*-*)
          # Big-Endian.
          # Build with the crypto extension implementation
-         GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha256-ppc.lo"
+         GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-ppc.lo"
    esac
 fi
 
@@ -2667,17 +2667,17 @@ if test "$found" = "1" ; then
       ;;
       powerpc64le-*-*)
          # Build with the crypto extension implementation
-         GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha512-ppc.lo"
+         GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-ppc.lo"
       ;;
       powerpc64-*-*)
          # Big-Endian.
          # Build with the crypto extension implementation
-         GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha512-ppc.lo"
+         GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-ppc.lo"
       ;;
       powerpc-*-*)
          # Big-Endian.
          # Build with the crypto extension implementation
-         GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha512-ppc.lo"
+         GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-ppc.lo"
    esac
 
    if test x"$neonsupport" = xyes ; then