diff --git a/cipher/crc-ppc.c b/cipher/crc-ppc.c index 4d7f0add..b9a40130 100644 --- a/cipher/crc-ppc.c +++ b/cipher/crc-ppc.c @@ -154,26 +154,63 @@ static const vector16x_u8 bswap_const ALIGNED_64 = #ifdef WORDS_BIGENDIAN # define CRC_VEC_U64_DEF(lo, hi) { (hi), (lo) } # define CRC_VEC_U64_LOAD(offs, ptr) \ - asm_swap_u64(vec_vsx_ld((offs), (const unsigned long long *)(ptr))) + asm_swap_u64(asm_vec_u64_load(offs, ptr)) # define CRC_VEC_U64_LOAD_LE(offs, ptr) \ - CRC_VEC_SWAP(vec_vsx_ld((offs), (const unsigned long long *)(ptr))) + CRC_VEC_SWAP(asm_vec_u64_load(offs, ptr)) # define CRC_VEC_U64_LOAD_BE(offs, ptr) \ - vec_vsx_ld((offs), (const unsigned long long *)(ptr)) + asm_vec_u64_load(offs, ptr) # define CRC_VEC_SWAP_TO_LE(v) CRC_VEC_SWAP(v) # define CRC_VEC_SWAP_TO_BE(v) (v) # define VEC_U64_LO 1 # define VEC_U64_HI 0 + +static ASM_FUNC_ATTR_INLINE vector2x_u64 +asm_vec_u64_load(unsigned long offset, const void *ptr) +{ + vector2x_u64 vecu64; +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ volatile ("lxvd2x %x0,0,%1\n\t" + : "=wa" (vecu64) + : "r" ((uintptr_t)ptr) + : "memory"); + else +#endif + __asm__ volatile ("lxvd2x %x0,%1,%2\n\t" + : "=wa" (vecu64) + : "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); + return vecu64; +} #else # define CRC_VEC_U64_DEF(lo, hi) { (lo), (hi) } -# define CRC_VEC_U64_LOAD(offs, ptr) \ - vec_vsx_ld((offs), (const unsigned long long *)(ptr)) -# define CRC_VEC_U64_LOAD_LE(offs, ptr) CRC_VEC_U64_LOAD((offs), (ptr)) +# define CRC_VEC_U64_LOAD(offs, ptr) asm_vec_u64_load_le(offs, ptr) +# define CRC_VEC_U64_LOAD_LE(offs, ptr) asm_vec_u64_load_le(offs, ptr) # define CRC_VEC_U64_LOAD_BE(offs, ptr) asm_vec_u64_load_be(offs, ptr) # define CRC_VEC_SWAP_TO_LE(v) (v) # define CRC_VEC_SWAP_TO_BE(v) CRC_VEC_SWAP(v) # define VEC_U64_LO 0 # define VEC_U64_HI 1 +static ASM_FUNC_ATTR_INLINE vector2x_u64 +asm_vec_u64_load_le(unsigned long offset, const void *ptr) +{ + vector2x_u64 vecu64; +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ volatile ("lxvd2x %x0,0,%1\n\t" + : "=wa" (vecu64) + : "r" ((uintptr_t)ptr) + : "memory"); + else +#endif + __asm__ volatile ("lxvd2x %x0,%1,%2\n\t" + : "=wa" (vecu64) + : "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); + return asm_swap_u64(vecu64); +} + static ASM_FUNC_ATTR_INLINE vector2x_u64 asm_vec_u64_load_be(unsigned int offset, const void *ptr) { diff --git a/cipher/sha512-ppc.c b/cipher/sha512-ppc.c index a758e1ea..31ea25bf 100644 --- a/cipher/sha512-ppc.c +++ b/cipher/sha512-ppc.c @@ -115,14 +115,62 @@ vec_merge_idx0_elems(vector2x_u64 v0, vector2x_u64 v1) static ASM_FUNC_ATTR_INLINE vector2x_u64 vec_vshasigma_u64(vector2x_u64 v, unsigned int a, unsigned int b) { - asm ("vshasigmad %0,%1,%2,%3" - : "=v" (v) - : "v" (v), "g" (a), "g" (b) - : "memory"); + __asm__ ("vshasigmad %0,%1,%2,%3" + : "=v" (v) + : "v" (v), "g" (a), "g" (b) + : "memory"); return v; } +static ASM_FUNC_ATTR_INLINE vector2x_u64 +vec_u64_load(unsigned long offset, const void *ptr) +{ + vector2x_u64 vecu64; +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ ("lxvd2x %x0,0,%1\n\t" + : "=wa" (vecu64) + : "r" ((uintptr_t)ptr) + : "memory"); + else +#endif + __asm__ ("lxvd2x %x0,%1,%2\n\t" + : "=wa" (vecu64) + : "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); +#ifndef WORDS_BIGENDIAN + __asm__ ("xxswapd %x0, %x1" + : "=wa" (vecu64) + : "wa" (vecu64)); +#endif + return vecu64; +} + + +static ASM_FUNC_ATTR_INLINE void +vec_u64_store(vector2x_u64 vecu64, unsigned long offset, void *ptr) +{ +#ifndef WORDS_BIGENDIAN + __asm__ ("xxswapd %x0, %x1" + : "=wa" (vecu64) + : "wa" (vecu64)); +#endif +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ ("stxvd2x %x0,0,%1\n\t" + : + : "wa" (vecu64), "r" ((uintptr_t)ptr) + : "memory"); + else +#endif + __asm__ ("stxvd2x %x0,%1,%2\n\t" + : + : "wa" (vecu64), "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); +} + + /* SHA2 round in vector registers */ #define R(a,b,c,d,e,f,g,h,k,w) do \ { \ @@ -168,13 +216,13 @@ _gcry_sha512_transform_ppc8(u64 state[8], vector2x_u64 a, b, c, d, e, f, g, h, t1, t2; u64 w[16]; - h0 = vec_vsx_ld (8 * 0, (unsigned long long *)state); + h0 = vec_u64_load (8 * 0, (unsigned long long *)state); h1 = vec_rol_elems (h0, 1); - h2 = vec_vsx_ld (8 * 2, (unsigned long long *)state); + h2 = vec_u64_load (8 * 2, (unsigned long long *)state); h3 = vec_rol_elems (h2, 1); - h4 = vec_vsx_ld (8 * 4, (unsigned long long *)state); + h4 = vec_u64_load (8 * 4, (unsigned long long *)state); h5 = vec_rol_elems (h4, 1); - h6 = vec_vsx_ld (8 * 6, (unsigned long long *)state); + h6 = vec_u64_load (8 * 6, (unsigned long long *)state); h7 = vec_rol_elems (h6, 1); while (nblks >= 2) @@ -514,10 +562,10 @@ _gcry_sha512_transform_ppc8(u64 state[8], h2 = vec_merge_idx0_elems (h2, h3); h4 = vec_merge_idx0_elems (h4, h5); h6 = vec_merge_idx0_elems (h6, h7); - vec_vsx_st (h0, 8 * 0, (unsigned long long *)state); - vec_vsx_st (h2, 8 * 2, (unsigned long long *)state); - vec_vsx_st (h4, 8 * 4, (unsigned long long *)state); - vec_vsx_st (h6, 8 * 6, (unsigned long long *)state); + vec_u64_store (h0, 8 * 0, (unsigned long long *)state); + vec_u64_store (h2, 8 * 2, (unsigned long long *)state); + vec_u64_store (h4, 8 * 4, (unsigned long long *)state); + vec_u64_store (h6, 8 * 6, (unsigned long long *)state); return sizeof(w); } diff --git a/configure.ac b/configure.ac index b6b6455a..be35ce42 100644 --- a/configure.ac +++ b/configure.ac @@ -1745,10 +1745,12 @@ AC_CACHE_CHECK([whether compiler supports PowerPC AltiVec/VSX intrinsics], AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[#include typedef vector unsigned char block; + typedef vector unsigned int vecu32; block fn(block in) { block t = vec_perm (in, in, vec_vsx_ld (0, (unsigned char*)0)); - return vec_cipher_be (t, in); + vecu32 y = vec_vsx_ld (0, (unsigned int*)0); + return vec_cipher_be (t, in) ^ (block)y; } ]])], [gcry_cv_cc_ppc_altivec=yes]) @@ -1769,10 +1771,12 @@ if test "$gcry_cv_cc_ppc_altivec" = "no" && AC_COMPILE_IFELSE([AC_LANG_SOURCE( [[#include typedef vector unsigned char block; + typedef vector unsigned int vecu32; block fn(block in) { block t = vec_perm (in, in, vec_vsx_ld (0, (unsigned char*)0)); - return vec_cipher_be (t, in); + vecu32 y = vec_vsx_ld (0, (unsigned int*)0); + return vec_cipher_be (t, in) ^ (block)y; }]])], [gcry_cv_cc_ppc_altivec_cflags=yes])]) if test "$gcry_cv_cc_ppc_altivec_cflags" = "yes" ; then diff --git a/configure.ac b/configure.ac index 202ac888..fd447906 100644 --- a/configure.ac +++ b/configure.ac @@ -2562,13 +2562,13 @@ if test "$found" = "1" ; then GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc-intel-pclmul.lo" ;; powerpc64le-*-*) - GCRYPT_CIPHERS="$GCRYPT_CIPHERS crc-ppc.lo" + GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc-ppc.lo" ;; powerpc64-*-*) - GCRYPT_CIPHERS="$GCRYPT_CIPHERS crc-ppc.lo" + GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc-ppc.lo" ;; powerpc-*-*) - GCRYPT_CIPHERS="$GCRYPT_CIPHERS crc-ppc.lo" + GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc-ppc.lo" ;; esac fi @@ -2635,17 +2635,17 @@ if test "$found" = "1" ; then ;; powerpc64le-*-*) # Build with the crypto extension implementation - GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha256-ppc.lo" + GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-ppc.lo" ;; powerpc64-*-*) # Big-Endian. # Build with the crypto extension implementation - GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha256-ppc.lo" + GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-ppc.lo" ;; powerpc-*-*) # Big-Endian. # Build with the crypto extension implementation - GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha256-ppc.lo" + GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256-ppc.lo" esac fi @@ -2667,17 +2667,17 @@ if test "$found" = "1" ; then ;; powerpc64le-*-*) # Build with the crypto extension implementation - GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha512-ppc.lo" + GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-ppc.lo" ;; powerpc64-*-*) # Big-Endian. # Build with the crypto extension implementation - GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha512-ppc.lo" + GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-ppc.lo" ;; powerpc-*-*) # Big-Endian. # Build with the crypto extension implementation - GCRYPT_CIPHERS="$GCRYPT_CIPHERS sha512-ppc.lo" + GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512-ppc.lo" esac if test x"$neonsupport" = xyes ; then