Tree - rpms/libgcrypt - CentOS Git server

rpms / libgcrypt

Blame SOURCES/libgcrypt-1.8.5-ppc-aes-gcm.patch

Blob History Raw

		e0be8b	`diff --git a/AUTHORS b/AUTHORS`
		e0be8b	`index ee336b2e..77055c25 100644`
		e0be8b	`--- a/AUTHORS`
		e0be8b	`+++ b/AUTHORS`
		e0be8b	`@@ -29,6 +29,7 @@ List of Copyright holders`
		e0be8b	`Copyright (C) 1996-1999 Peter Gutmann, Paul Kendall, and Chris Wedgwood`
		e0be8b	`Copyright (C) 1996-2006 Peter Gutmann, Matt Thomlinson and Blake Coverett`
		e0be8b	`Copyright (C) 2003 Nikos Mavroyanopoulos`
		e0be8b	`+ Copyright (c) 2006 CRYPTOGAMS`
		e0be8b	`Copyright (C) 2006-2007 NTT (Nippon Telegraph and Telephone Corporation)`
		e0be8b	`Copyright (C) 2012-2019 g10 Code GmbH`
		e0be8b	`Copyright (C) 2012 Simon Josefsson, Niels Möller`
		e0be8b	`diff --git a/LICENSES b/LICENSES`
		e0be8b	`index f6733a69..c19284e2 100644`
		e0be8b	`--- a/LICENSES`
		e0be8b	`+++ b/LICENSES`
		e0be8b	`@@ -54,7 +54,6 @@ with any binary distributions derived from the GNU C Library.`
		e0be8b	`SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.`
		e0be8b	`#+end_quote`
		e0be8b
		e0be8b	`-`
		e0be8b	`For files:`
		e0be8b	`- random/jitterentropy-base.c`
		e0be8b	`- random/jitterentropy.h`
		e0be8b	`@@ -99,6 +98,48 @@ with any binary distributions derived from the GNU C Library.`
		e0be8b	`* DAMAGE.`
		e0be8b	`#+end_quote`
		e0be8b
		e0be8b	`+ For files:`
		e0be8b	`+ - cipher/cipher-gcm-ppc.c`
		e0be8b	`+`
		e0be8b	`+#+begin_quote`
		e0be8b	`+ Copyright (c) 2006, CRYPTOGAMS by <appro@openssl.org>`
		e0be8b	`+ All rights reserved.`
		e0be8b	`+`
		e0be8b	`+ Redistribution and use in source and binary forms, with or without`
		e0be8b	`+ modification, are permitted provided that the following conditions`
		e0be8b	`+ are met:`
		e0be8b	`+`
		e0be8b	`+ * Redistributions of source code must retain copyright notices,`
		e0be8b	`+ this list of conditions and the following disclaimer.`
		e0be8b	`+`
		e0be8b	`+ * Redistributions in binary form must reproduce the above`
		e0be8b	`+ copyright notice, this list of conditions and the following`
		e0be8b	`+ disclaimer in the documentation and/or other materials`
		e0be8b	`+ provided with the distribution.`
		e0be8b	`+`
		e0be8b	`+ * Neither the name of the CRYPTOGAMS nor the names of its`
		e0be8b	`+ copyright holder and contributors may be used to endorse or`
		e0be8b	`+ promote products derived from this software without specific`
		e0be8b	`+ prior written permission.`
		e0be8b	`+`
		e0be8b	`+ ALTERNATIVELY, provided that this notice is retained in full, this`
		e0be8b	`+ product may be distributed under the terms of the GNU General Public`
		e0be8b	`+ License (GPL), in which case the provisions of the GPL apply INSTEAD OF`
		e0be8b	`+ those given above.`
		e0be8b	`+`
		e0be8b	`+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS`
		e0be8b	`+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT`
		e0be8b	`+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR`
		e0be8b	`+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT`
		e0be8b	`+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,`
		e0be8b	`+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT`
		e0be8b	`+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,`
		e0be8b	`+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY`
		e0be8b	`+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT`
		e0be8b	`+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE`
		e0be8b	`+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.`
		e0be8b	`+#+end_quote`
		e0be8b	`+`
		e0be8b	`* X License`
		e0be8b
		e0be8b	`For files:`
		e0be8b	`diff --git a/cipher/Makefile.am b/cipher/Makefile.am`
		e0be8b	`index 1728e9f9..ab5d2a38 100644`
		e0be8b	`--- a/cipher/Makefile.am`
		e0be8b	`+++ b/cipher/Makefile.am`
		e0be8b	`@@ -66,6 +66,7 @@ blowfish.c blowfish-amd64.S blowfish-arm.S \`
		e0be8b	`cast5.c cast5-amd64.S cast5-arm.S \`
		e0be8b	`chacha20.c chacha20-sse2-amd64.S chacha20-ssse3-amd64.S chacha20-avx2-amd64.S \`
		e0be8b	`chacha20-armv7-neon.S \`
		e0be8b	`+cipher-gcm-ppc.c \`
		e0be8b	`crc.c \`
		e0be8b	`crc-intel-pclmul.c crc-ppc.c \`
		e0be8b	`des.c des-amd64.S \`
		e0be8b	`@@ -165,3 +166,9 @@ crc-ppc.o: $(srcdir)/crc-ppc.c Makefile`
		e0be8b
		e0be8b	`crc-ppc.lo: $(srcdir)/crc-ppc.c Makefile`
		e0be8b	`echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< `
		e0be8b	`+`
		e0be8b	`+cipher-gcm-ppc.o: $(srcdir)/cipher-gcm-ppc.c Makefile`
		e0be8b	+ `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< `
		e0be8b	`+`
		e0be8b	`+cipher-gcm-ppc.lo: $(srcdir)/cipher-gcm-ppc.c Makefile`
		e0be8b	+ `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< `
		e0be8b	`diff --git a/cipher/cipher-gcm-ppc.c b/cipher/cipher-gcm-ppc.c`
		e0be8b	`new file mode 100644`
		e0be8b	`index 00000000..ed27ef15`
		e0be8b	`--- /dev/null`
		e0be8b	`+++ b/cipher/cipher-gcm-ppc.c`
		e0be8b	`@@ -0,0 +1,510 @@`
		e0be8b	`+/* cipher-gcm-ppc.c - Power 8 vpmsum accelerated Galois Counter Mode`
		e0be8b	`+ * implementation`
		e0be8b	`+ * Copyright (C) 2019 Shawn Landden <shawn@git.icu>`
		e0be8b	`+ *`
		e0be8b	`+ * This file is part of Libgcrypt.`
		e0be8b	`+ *`
		e0be8b	`+ * Libgcrypt is free software; you can redistribute it and/or modify`
		e0be8b	`+ * it under the terms of the GNU Lesser general Public License as`
		e0be8b	`+ * published by the Free Software Foundation; either version 2.1 of`
		e0be8b	`+ * the License, or (at your option) any later version.`
		e0be8b	`+ *`
		e0be8b	`+ * Libgcrypt is distributed in the hope that it will be useful,`
		e0be8b	`+ * but WITHOUT ANY WARRANTY; without even the implied warranty of`
		e0be8b	`+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
		e0be8b	`+ * GNU Lesser General Public License for more details.`
		e0be8b	`+ *`
		e0be8b	`+ * You should have received a copy of the GNU Lesser General Public`
		e0be8b	`+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.`
		e0be8b	`+ *`
		e0be8b	`+ * Based on GHASH implementation by Andy Polyakov from CRYPTOGAMS`
		e0be8b	`+ * distribution (ppc/ghashp8-ppc.pl). Specifically, it uses his register`
		e0be8b	`+ * allocation (which then defers to your compiler's register allocation),`
		e0be8b	`+ * instead of re-implementing Gerald Estrin's Scheme of parallelized`
		e0be8b	`+ * multiplication of polynomials, as I did not understand this algorithm at`
		e0be8b	`+ * the time.`
		e0be8b	`+ *`
		e0be8b	`+ * Original copyright license follows:`
		e0be8b	`+ *`
		e0be8b	`+ * Copyright (c) 2006, CRYPTOGAMS by <appro@openssl.org>`
		e0be8b	`+ * All rights reserved.`
		e0be8b	`+ *`
		e0be8b	`+ * Redistribution and use in source and binary forms, with or without`
		e0be8b	`+ * modification, are permitted provided that the following conditions`
		e0be8b	`+ * are met:`
		e0be8b	`+ *`
		e0be8b	`+ * * Redistributions of source code must retain copyright notices,`
		e0be8b	`+ * this list of conditions and the following disclaimer.`
		e0be8b	`+ *`
		e0be8b	`+ * * Redistributions in binary form must reproduce the above`
		e0be8b	`+ * copyright notice, this list of conditions and the following`
		e0be8b	`+ * disclaimer in the documentation and/or other materials`
		e0be8b	`+ * provided with the distribution.`
		e0be8b	`+ *`
		e0be8b	`+ * * Neither the name of the CRYPTOGAMS nor the names of its`
		e0be8b	`+ * copyright holder and contributors may be used to endorse or`
		e0be8b	`+ * promote products derived from this software without specific`
		e0be8b	`+ * prior written permission.`
		e0be8b	`+ *`
		e0be8b	`+ * ALTERNATIVELY, provided that this notice is retained in full, this`
		e0be8b	`+ * product may be distributed under the terms of the GNU General Public`
		e0be8b	`+ * License (GPL), in which case the provisions of the GPL apply INSTEAD OF`
		e0be8b	`+ * those given above.`
		e0be8b	`+ *`
		e0be8b	`+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS`
		e0be8b	`+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT`
		e0be8b	`+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR`
		e0be8b	`+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT`
		e0be8b	`+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,`
		e0be8b	`+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT`
		e0be8b	`+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,`
		e0be8b	`+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY`
		e0be8b	`+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT`
		e0be8b	`+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE`
		e0be8b	`+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.`
		e0be8b	`+ *`
		e0be8b	`+ * SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)`
		e0be8b	`+ */`
		e0be8b	`+`
		e0be8b	`+#include <config.h>`
		e0be8b	`+#include <stdio.h>`
		e0be8b	`+#include <stdlib.h>`
		e0be8b	`+#include <string.h>`
		e0be8b	`+#include <errno.h>`
		e0be8b	`+#include <stdint.h>`
		e0be8b	`+`
		e0be8b	`+#include "g10lib.h"`
		e0be8b	`+#include "cipher.h"`
		e0be8b	`+#include "bufhelp.h"`
		e0be8b	`+#include "./cipher-internal.h"`
		e0be8b	`+`
		e0be8b	`+#ifdef GCM_USE_PPC_VPMSUM`
		e0be8b	`+`
		e0be8b	`+#include <altivec.h>`
		e0be8b	`+`
		e0be8b	`+#define ALWAYS_INLINE inline __attribute__((always_inline))`
		e0be8b	`+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))`
		e0be8b	`+`
		e0be8b	`+#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION`
		e0be8b	`+#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE`
		e0be8b	`+`
		e0be8b	`+typedef vector unsigned char vector16x_u8;`
		e0be8b	`+typedef vector signed char vector16x_s8;`
		e0be8b	`+typedef vector unsigned long long vector2x_u64;`
		e0be8b	`+typedef vector unsigned long long block;`
		e0be8b	`+`
		e0be8b	`+static ASM_FUNC_ATTR_INLINE block`
		e0be8b	`+asm_vpmsumd(block a, block b)`
		e0be8b	`+{`
		e0be8b	`+ block r;`
		e0be8b	`+ __asm__("vpmsumd %0, %1, %2"`
		e0be8b	`+ : "=v" (r)`
		e0be8b	`+ : "v" (a), "v" (b));`
		e0be8b	`+ return r;`
		e0be8b	`+}`
		e0be8b	`+`
		e0be8b	`+static ASM_FUNC_ATTR_INLINE block`
		e0be8b	`+asm_swap_u64(block a)`
		e0be8b	`+{`
		e0be8b	`+ __asm__("xxswapd %x0, %x1"`
		e0be8b	`+ : "=wa" (a)`
		e0be8b	`+ : "wa" (a));`
		e0be8b	`+ return a;`
		e0be8b	`+}`
		e0be8b	`+`
		e0be8b	`+static ASM_FUNC_ATTR_INLINE block`
		e0be8b	`+asm_rot_block_left(block a)`
		e0be8b	`+{`
		e0be8b	`+ block zero = {0, 0};`
		e0be8b	`+ block mask = {2, 0};`
		e0be8b	`+ return __builtin_shuffle(a, zero, mask);`
		e0be8b	`+}`
		e0be8b	`+`
		e0be8b	`+static ASM_FUNC_ATTR_INLINE block`
		e0be8b	`+asm_rot_block_right(block a)`
		e0be8b	`+{`
		e0be8b	`+ block zero = {0, 0};`
		e0be8b	`+ block mask = {1, 2};`
		e0be8b	`+ return __builtin_shuffle(a, zero, mask);`
		e0be8b	`+}`
		e0be8b	`+`
		e0be8b	`+/* vsl is a slightly strange function in the way the shift is passed... */`
		e0be8b	`+static ASM_FUNC_ATTR_INLINE block`
		e0be8b	`+asm_ashl_128(block a, vector16x_u8 shift)`
		e0be8b	`+{`
		e0be8b	`+ block r;`
		e0be8b	`+ __asm__("vsl %0, %1, %2"`
		e0be8b	`+ : "=v" (r)`
		e0be8b	`+ : "v" (a), "v" (shift));`
		e0be8b	`+ return r;`
		e0be8b	`+}`
		e0be8b	`+`
		e0be8b	`+#define ALIGNED_LOAD(in_ptr) \`
		e0be8b	`+ (vec_aligned_ld (0, (const unsigned char *)(in_ptr)))`
		e0be8b	`+`
		e0be8b	`+static ASM_FUNC_ATTR_INLINE block`
		e0be8b	`+vec_aligned_ld(unsigned long offset, const unsigned char *ptr)`
		e0be8b	`+{`
		e0be8b	`+#ifndef WORDS_BIGENDIAN`
		e0be8b	`+ block vec;`
		e0be8b	`+ __asm__ ("lvx %0,%1,%2\n\t"`
		e0be8b	`+ : "=v" (vec)`
		e0be8b	`+ : "r" (offset), "r" ((uintptr_t)ptr)`
		e0be8b	`+ : "memory", "r0");`
		e0be8b	`+ return vec;`
		e0be8b	`+#else`
		e0be8b	`+ return vec_vsx_ld (offset, ptr);`
		e0be8b	`+#endif`
		e0be8b	`+}`
		e0be8b	`+`
		e0be8b	`+#define STORE_TABLE(gcm_table, slot, vec) \`
		e0be8b	`+ vec_aligned_st (((block)vec), slot * 16, (unsigned char *)(gcm_table));`
		e0be8b	`+`
		e0be8b	`+`
		e0be8b	`+static ASM_FUNC_ATTR_INLINE void`
		e0be8b	`+vec_aligned_st(block vec, unsigned long offset, unsigned char *ptr)`
		e0be8b	`+{`
		e0be8b	`+#ifndef WORDS_BIGENDIAN`
		e0be8b	`+ __asm__ ("stvx %0,%1,%2\n\t"`
		e0be8b	`+ :`
		e0be8b	`+ : "v" (vec), "r" (offset), "r" ((uintptr_t)ptr)`
		e0be8b	`+ : "memory", "r0");`
		e0be8b	`+#else`
		e0be8b	`+ vec_vsx_st ((vector16x_u8)vec, offset, ptr);`
		e0be8b	`+#endif`
		e0be8b	`+}`
		e0be8b	`+`
		e0be8b	`+#define VEC_LOAD_BE(in_ptr, bswap_const) \`
		e0be8b	`+ (vec_load_be (0, (const unsigned char *)(in_ptr), bswap_const))`
		e0be8b	`+`
		e0be8b	`+static ASM_FUNC_ATTR_INLINE block`
		e0be8b	`+vec_load_be(unsigned long offset, const unsigned char *ptr,`
		e0be8b	`+ vector unsigned char be_bswap_const)`
		e0be8b	`+{`
		e0be8b	`+#ifndef WORDS_BIGENDIAN`
		e0be8b	`+ block vec;`
		e0be8b	`+ /* GCC vec_vsx_ld is generating two instructions on little-endian. Use`
		e0be8b	`+ * lxvw4x directly instead. */`
		e0be8b	`+ __asm__ ("lxvw4x %x0,%1,%2\n\t"`
		e0be8b	`+ : "=wa" (vec)`
		e0be8b	`+ : "r" (offset), "r" ((uintptr_t)ptr)`
		e0be8b	`+ : "memory", "r0");`
		e0be8b	`+ __asm__ ("vperm %0,%1,%1,%2\n\t"`
		e0be8b	`+ : "=v" (vec)`
		e0be8b	`+ : "v" (vec), "v" (be_bswap_const));`
		e0be8b	`+ return vec;`
		e0be8b	`+#else`
		e0be8b	`+ (void)be_bswap_const;`
		e0be8b	`+ return vec_vsx_ld (offset, ptr);`
		e0be8b	`+#endif`
		e0be8b	`+}`
		e0be8b	`+`
		e0be8b	`+/* Power ghash based on papers:`
		e0be8b	`+ "The Galois/Counter Mode of Operation (GCM)"; David A. McGrew, John Viega`
		e0be8b	`+ "Intel® Carry-Less Multiplication Instruction and its Usage for Computing`
		e0be8b	`+ the GCM Mode - Rev 2.01"; Shay Gueron, Michael E. Kounavis.`
		e0be8b	`+`
		e0be8b	`+ After saving the magic c2 constant and pre-formatted version of the key,`
		e0be8b	`+ we pre-process the key for parallel hashing. This takes advantage of the`
		e0be8b	`+ identity of addition over a galois field being identital to XOR, and thus`
		e0be8b	`+ can be parellized (S 2.2, page 3). We multiply and add (galois field`
		e0be8b	`+ versions) the key over multiple iterations and save the result. This can`
		e0be8b	`+ later be galois added (XORed) with parallel processed input (Estrin's`
		e0be8b	`+ Scheme).`
		e0be8b	`+`
		e0be8b	`+ The ghash "key" is a salt. */`
		e0be8b	`+void ASM_FUNC_ATTR`
		e0be8b	`+_gcry_ghash_setup_ppc_vpmsum (uint64_t gcm_table, void gcm_key)`
		e0be8b	`+{`
		e0be8b	`+ vector16x_u8 bswap_const =`
		e0be8b	`+ { 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 };`
		e0be8b	`+ vector16x_u8 c2 =`
		e0be8b	`+ { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0b11000010 };`
		e0be8b	`+ block T0, T1, T2;`
		e0be8b	`+ block C2, H, H1, H1l, H1h, H2, H2l, H2h;`
		e0be8b	`+ block H3l, H3, H3h, H4l, H4, H4h, T3, T4;`
		e0be8b	`+ vector16x_s8 most_sig_of_H, t7, carry;`
		e0be8b	`+ vector16x_u8 one = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };`
		e0be8b	`+`
		e0be8b	`+ H = VEC_LOAD_BE(gcm_key, bswap_const);`
		e0be8b	`+ most_sig_of_H = vec_splat((vector16x_s8)H, 15);`
		e0be8b	`+ t7 = vec_splat_s8(7);`
		e0be8b	`+ carry = most_sig_of_H >> t7;`
		e0be8b	`+ carry &= c2; /* only interested in certain carries. */`
		e0be8b	`+ H1 = asm_ashl_128(H, one);`
		e0be8b	`+ H1 ^= (block)carry; /* complete the <<< 1 */`
		e0be8b	`+`
		e0be8b	`+ T1 = asm_swap_u64 (H1);`
		e0be8b	`+ H1l = asm_rot_block_right (T1);`
		e0be8b	`+ H1h = asm_rot_block_left (T1);`
		e0be8b	`+ C2 = asm_rot_block_right ((block)c2);`
		e0be8b	`+`
		e0be8b	`+ STORE_TABLE (gcm_table, 0, C2);`
		e0be8b	`+ STORE_TABLE (gcm_table, 1, H1l);`
		e0be8b	`+ STORE_TABLE (gcm_table, 2, T1);`
		e0be8b	`+ STORE_TABLE (gcm_table, 3, H1h);`
		e0be8b	`+`
		e0be8b	`+ /* pre-process coefficients for Gerald Estrin's scheme for parallel`
		e0be8b	`+ * multiplication of polynomials`
		e0be8b	`+ */`
		e0be8b	`+ H2l = asm_vpmsumd (H1l, H1); /* do not need to mask in`
		e0be8b	`+ because 0 * anything -> 0 */`
		e0be8b	`+ H2 = asm_vpmsumd (T1, H1);`
		e0be8b	`+ H2h = asm_vpmsumd (H1h, H1);`
		e0be8b	`+`
		e0be8b	`+ /* reduce 1 */`
		e0be8b	`+ T0 = asm_vpmsumd (H2l, C2);`
		e0be8b	`+`
		e0be8b	`+ H2l ^= asm_rot_block_left (H2);;`
		e0be8b	`+ H2h ^= asm_rot_block_right (H2);`
		e0be8b	`+ H2l = asm_swap_u64 (H2l);`
		e0be8b	`+ H2l ^= T0;`
		e0be8b	`+ /* reduce 2 */`
		e0be8b	`+ T0 = asm_swap_u64 (H2l);`
		e0be8b	`+ H2l = asm_vpmsumd (H2l, C2);`
		e0be8b	`+ H2 = H2l ^ H2h ^ T0;`
		e0be8b	`+`
		e0be8b	`+ T2 = asm_swap_u64 (H2);`
		e0be8b	`+ H2l = asm_rot_block_right (T2);`
		e0be8b	`+ H2h = asm_rot_block_left (T2);`
		e0be8b	`+`
		e0be8b	`+ STORE_TABLE (gcm_table, 4, H2l);`
		e0be8b	`+ STORE_TABLE (gcm_table, 5, T2);`
		e0be8b	`+ STORE_TABLE (gcm_table, 6, H2h);`
		e0be8b	`+`
		e0be8b	`+ H3l = asm_vpmsumd (H2l, H1);`
		e0be8b	`+ H4l = asm_vpmsumd (H2l, H2);`
		e0be8b	`+ H3 = asm_vpmsumd (T2, H1);`
		e0be8b	`+ H4 = asm_vpmsumd (T2, H2);`
		e0be8b	`+ H3h = asm_vpmsumd (H2h, H1);`
		e0be8b	`+ H4h = asm_vpmsumd (H2h, H2);`
		e0be8b	`+`
		e0be8b	`+ T3 = asm_vpmsumd (H3l, C2);`
		e0be8b	`+ T4 = asm_vpmsumd (H4l, C2);`
		e0be8b	`+`
		e0be8b	`+ H3l ^= asm_rot_block_left (H3);`
		e0be8b	`+ H3h ^= asm_rot_block_right (H3);`
		e0be8b	`+ H4l ^= asm_rot_block_left (H4);`
		e0be8b	`+ H4h ^= asm_rot_block_right (H4);`
		e0be8b	`+`
		e0be8b	`+ H3 = asm_swap_u64 (H3l);`
		e0be8b	`+ H4 = asm_swap_u64 (H4l);`
		e0be8b	`+`
		e0be8b	`+ H3 ^= T3;`
		e0be8b	`+ H4 ^= T4;`
		e0be8b	`+`
		e0be8b	`+ /* We could have also b64 switched reduce and reduce2, however as we are`
		e0be8b	`+ using the unrotated H and H2 above to vpmsum, this is marginally better. */`
		e0be8b	`+ T3 = asm_swap_u64 (H3);`
		e0be8b	`+ T4 = asm_swap_u64 (H4);`
		e0be8b	`+`
		e0be8b	`+ H3 = asm_vpmsumd (H3, C2);`
		e0be8b	`+ H4 = asm_vpmsumd (H4, C2);`
		e0be8b	`+`
		e0be8b	`+ T3 ^= H3h;`
		e0be8b	`+ T4 ^= H4h;`
		e0be8b	`+ H3 ^= T3;`
		e0be8b	`+ H4 ^= T4;`
		e0be8b	`+ H3 = asm_swap_u64 (H3);`
		e0be8b	`+ H4 = asm_swap_u64 (H4);`
		e0be8b	`+`
		e0be8b	`+ H3l = asm_rot_block_right (H3);`
		e0be8b	`+ H3h = asm_rot_block_left (H3);`
		e0be8b	`+ H4l = asm_rot_block_right (H4);`
		e0be8b	`+ H4h = asm_rot_block_left (H4);`
		e0be8b	`+`
		e0be8b	`+ STORE_TABLE (gcm_table, 7, H3l);`
		e0be8b	`+ STORE_TABLE (gcm_table, 8, H3);`
		e0be8b	`+ STORE_TABLE (gcm_table, 9, H3h);`
		e0be8b	`+ STORE_TABLE (gcm_table, 10, H4l);`
		e0be8b	`+ STORE_TABLE (gcm_table, 11, H4);`
		e0be8b	`+ STORE_TABLE (gcm_table, 12, H4h);`
		e0be8b	`+}`
		e0be8b	`+`
		e0be8b	`+ASM_FUNC_ATTR_INLINE`
		e0be8b	`+block`
		e0be8b	`+vec_perm2(block l, block r, vector16x_u8 perm) {`
		e0be8b	`+ block ret;`
		e0be8b	`+ __asm__ ("vperm %0,%1,%2,%3\n\t"`
		e0be8b	`+ : "=v" (ret)`
		e0be8b	`+ : "v" (l), "v" (r), "v" (perm));`
		e0be8b	`+ return ret;`
		e0be8b	`+}`
		e0be8b	`+`
		e0be8b	`+void ASM_FUNC_ATTR`
		e0be8b	`+_gcry_ghash_ppc_vpmsum (const byte result, const void const gcm_table,`
		e0be8b	`+ const byte *const buf, const size_t nblocks)`
		e0be8b	`+{`
		e0be8b	`+ /* This const is strange, it is reversing the bytes, and also reversing`
		e0be8b	`+ the u32s that get switched by lxvw4 and it also addresses bytes big-endian,`
		e0be8b	`+ and is here due to lack of proper peep-hole optimization. */`
		e0be8b	`+ vector16x_u8 bswap_const =`
		e0be8b	`+ { 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 };`
		e0be8b	`+ vector16x_u8 bswap_8_const =`
		e0be8b	`+ { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };`
		e0be8b	`+ block c2, H0l, H0m, H0h, H4l, H4m, H4h, H2m, H3l, H3m, H3h, Hl;`
		e0be8b	`+ block Hm, Hh, in, in0, in1, in2, in3, Hm_right, Hl_rotate, cur;`
		e0be8b	`+ size_t blocks_remaining = nblocks, off = 0;`
		e0be8b	`+ size_t not_multiple_of_four;`
		e0be8b	`+ block t0;`
		e0be8b	`+`
		e0be8b	`+ cur = vec_load_be (0, result, bswap_const);`
		e0be8b	`+`
		e0be8b	`+ c2 = vec_aligned_ld (0, gcm_table);`
		e0be8b	`+ H0l = vec_aligned_ld (16, gcm_table);`
		e0be8b	`+ H0m = vec_aligned_ld (32, gcm_table);`
		e0be8b	`+ H0h = vec_aligned_ld (48, gcm_table);`
		e0be8b	`+`
		e0be8b	`+ for (not_multiple_of_four = nblocks % 4; not_multiple_of_four;`
		e0be8b	`+ not_multiple_of_four--)`
		e0be8b	`+ {`
		e0be8b	`+ in = vec_load_be (off, buf, bswap_const);`
		e0be8b	`+ off += 16;`
		e0be8b	`+ blocks_remaining--;`
		e0be8b	`+ cur ^= in;`
		e0be8b	`+`
		e0be8b	`+ Hl = asm_vpmsumd (cur, H0l);`
		e0be8b	`+ Hm = asm_vpmsumd (cur, H0m);`
		e0be8b	`+ Hh = asm_vpmsumd (cur, H0h);`
		e0be8b	`+`
		e0be8b	`+ t0 = asm_vpmsumd (Hl, c2);`
		e0be8b	`+`
		e0be8b	`+ Hl ^= asm_rot_block_left (Hm);`
		e0be8b	`+`
		e0be8b	`+ Hm_right = asm_rot_block_right (Hm);`
		e0be8b	`+ Hh ^= Hm_right;`
		e0be8b	`+ Hl_rotate = asm_swap_u64 (Hl);`
		e0be8b	`+ Hl_rotate ^= t0;`
		e0be8b	`+ Hl = asm_swap_u64 (Hl_rotate);`
		e0be8b	`+ Hl_rotate = asm_vpmsumd (Hl_rotate, c2);`
		e0be8b	`+ Hl ^= Hh;`
		e0be8b	`+ Hl ^= Hl_rotate;`
		e0be8b	`+`
		e0be8b	`+ cur = Hl;`
		e0be8b	`+ }`
		e0be8b	`+`
		e0be8b	`+ if (blocks_remaining > 0)`
		e0be8b	`+ {`
		e0be8b	`+ vector16x_u8 hiperm =`
		e0be8b	`+ {`
		e0be8b	`+ 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10,`
		e0be8b	`+ 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0`
		e0be8b	`+ };`
		e0be8b	`+ vector16x_u8 loperm =`
		e0be8b	`+ {`
		e0be8b	`+ 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18,`
		e0be8b	`+ 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8`
		e0be8b	`+ };`
		e0be8b	`+ block Xl, Xm, Xh, Xl1, Xm1, Xh1, Xm2, Xl3, Xm3, Xh3, Xl_rotate;`
		e0be8b	`+ block H21l, H21h, merge_l, merge_h;`
		e0be8b	`+`
		e0be8b	`+ H2m = vec_aligned_ld (48 + 32, gcm_table);`
		e0be8b	`+ H3l = vec_aligned_ld (48 * 2 + 16, gcm_table);`
		e0be8b	`+ H3m = vec_aligned_ld (48 * 2 + 32, gcm_table);`
		e0be8b	`+ H3h = vec_aligned_ld (48 * 2 + 48, gcm_table);`
		e0be8b	`+ H4l = vec_aligned_ld (48 * 3 + 16, gcm_table);`
		e0be8b	`+ H4m = vec_aligned_ld (48 * 3 + 32, gcm_table);`
		e0be8b	`+ H4h = vec_aligned_ld (48 * 3 + 48, gcm_table);`
		e0be8b	`+`
		e0be8b	`+ in0 = vec_load_be (off, buf, bswap_const);`
		e0be8b	`+ in1 = vec_load_be (off + 16, buf, bswap_const);`
		e0be8b	`+ in2 = vec_load_be (off + 32, buf, bswap_const);`
		e0be8b	`+ in3 = vec_load_be (off + 48, buf, bswap_const);`
		e0be8b	`+ blocks_remaining -= 4;`
		e0be8b	`+ off += 64;`
		e0be8b	`+`
		e0be8b	`+ Xh = in0 ^ cur;`
		e0be8b	`+`
		e0be8b	`+ Xl1 = asm_vpmsumd (in1, H3l);`
		e0be8b	`+ Xm1 = asm_vpmsumd (in1, H3m);`
		e0be8b	`+ Xh1 = asm_vpmsumd (in1, H3h);`
		e0be8b	`+`
		e0be8b	`+ H21l = vec_perm2 (H2m, H0m, hiperm);`
		e0be8b	`+ H21h = vec_perm2 (H2m, H0m, loperm);`
		e0be8b	`+ merge_l = vec_perm2 (in2, in3, loperm);`
		e0be8b	`+ merge_h = vec_perm2 (in2, in3, hiperm);`
		e0be8b	`+`
		e0be8b	`+ Xm2 = asm_vpmsumd (in2, H2m);`
		e0be8b	`+ Xl3 = asm_vpmsumd (merge_l, H21l);`
		e0be8b	`+ Xm3 = asm_vpmsumd (in3, H0m);`
		e0be8b	`+ Xh3 = asm_vpmsumd (merge_h, H21h);`
		e0be8b	`+`
		e0be8b	`+ Xm2 ^= Xm1;`
		e0be8b	`+ Xl3 ^= Xl1;`
		e0be8b	`+ Xm3 ^= Xm2;`
		e0be8b	`+ Xh3 ^= Xh1;`
		e0be8b	`+`
		e0be8b	`+ /* Gerald Estrin's scheme for parallel multiplication of polynomials */`
		e0be8b	`+ for (;blocks_remaining > 0; blocks_remaining -= 4, off += 64)`
		e0be8b	`+ {`
		e0be8b	`+ in0 = vec_load_be (off, buf, bswap_const);`
		e0be8b	`+ in1 = vec_load_be (off + 16, buf, bswap_const);`
		e0be8b	`+ in2 = vec_load_be (off + 32, buf, bswap_const);`
		e0be8b	`+ in3 = vec_load_be (off + 48, buf, bswap_const);`
		e0be8b	`+`
		e0be8b	`+ Xl = asm_vpmsumd (Xh, H4l);`
		e0be8b	`+ Xm = asm_vpmsumd (Xh, H4m);`
		e0be8b	`+ Xh = asm_vpmsumd (Xh, H4h);`
		e0be8b	`+ Xl1 = asm_vpmsumd (in1, H3l);`
		e0be8b	`+ Xm1 = asm_vpmsumd (in1, H3m);`
		e0be8b	`+ Xh1 = asm_vpmsumd (in1, H3h);`
		e0be8b	`+`
		e0be8b	`+ Xl ^= Xl3;`
		e0be8b	`+ Xm ^= Xm3;`
		e0be8b	`+ Xh ^= Xh3;`
		e0be8b	`+ merge_l = vec_perm2 (in2, in3, loperm);`
		e0be8b	`+ merge_h = vec_perm2 (in2, in3, hiperm);`
		e0be8b	`+`
		e0be8b	`+ t0 = asm_vpmsumd (Xl, c2);`
		e0be8b	`+ Xl3 = asm_vpmsumd (merge_l, H21l);`
		e0be8b	`+ Xh3 = asm_vpmsumd (merge_h, H21h);`
		e0be8b	`+`
		e0be8b	`+ Xl ^= asm_rot_block_left (Xm);`
		e0be8b	`+ Xh ^= asm_rot_block_right (Xm);`
		e0be8b	`+`
		e0be8b	`+ Xl = asm_swap_u64 (Xl);`
		e0be8b	`+ Xl ^= t0;`
		e0be8b	`+`
		e0be8b	`+ Xl_rotate = asm_swap_u64 (Xl);`
		e0be8b	`+ Xm2 = asm_vpmsumd (in2, H2m);`
		e0be8b	`+ Xm3 = asm_vpmsumd (in3, H0m);`
		e0be8b	`+ Xl = asm_vpmsumd (Xl, c2);`
		e0be8b	`+`
		e0be8b	`+ Xl3 ^= Xl1;`
		e0be8b	`+ Xh3 ^= Xh1;`
		e0be8b	`+ Xh ^= in0;`
		e0be8b	`+ Xm2 ^= Xm1;`
		e0be8b	`+ Xh ^= Xl_rotate;`
		e0be8b	`+ Xm3 ^= Xm2;`
		e0be8b	`+ Xh ^= Xl;`
		e0be8b	`+ }`
		e0be8b	`+`
		e0be8b	`+ Xl = asm_vpmsumd (Xh, H4l);`
		e0be8b	`+ Xm = asm_vpmsumd (Xh, H4m);`
		e0be8b	`+ Xh = asm_vpmsumd (Xh, H4h);`
		e0be8b	`+`
		e0be8b	`+ Xl ^= Xl3;`
		e0be8b	`+ Xm ^= Xm3;`
		e0be8b	`+`
		e0be8b	`+ t0 = asm_vpmsumd (Xl, c2);`
		e0be8b	`+`
		e0be8b	`+ Xh ^= Xh3;`
		e0be8b	`+ Xl ^= asm_rot_block_left (Xm);`
		e0be8b	`+ Xh ^= asm_rot_block_right (Xm);`
		e0be8b	`+`
		e0be8b	`+ Xl = asm_swap_u64 (Xl);`
		e0be8b	`+ Xl ^= t0;`
		e0be8b	`+`
		e0be8b	`+ Xl_rotate = asm_swap_u64 (Xl);`
		e0be8b	`+ Xl = asm_vpmsumd (Xl, c2);`
		e0be8b	`+ Xl_rotate ^= Xh;`
		e0be8b	`+ Xl ^= Xl_rotate;`
		e0be8b	`+`
		e0be8b	`+ cur = Xl;`
		e0be8b	`+ }`
		e0be8b	`+`
		e0be8b	`+ cur = (block)vec_perm ((vector16x_u8)cur, (vector16x_u8)cur, bswap_8_const);`
		e0be8b	`+ STORE_TABLE (result, 0, cur);`
		e0be8b	`+}`
		e0be8b	`+`
		e0be8b	`+#endif /* GCM_USE_PPC_VPMSUM */`
		e0be8b	`diff --git a/cipher/cipher-gcm.c b/cipher/cipher-gcm.c`
		e0be8b	`index 32ec9fa0..b84a0698 100644`
		e0be8b	`--- a/cipher/cipher-gcm.c`
		e0be8b	`+++ b/cipher/cipher-gcm.c`
		e0be8b	`@@ -61,6 +61,28 @@ ghash_armv8_ce_pmull (gcry_cipher_hd_t c, byte result, const byte buf,`
		e0be8b
		e0be8b	`#endif`
		e0be8b
		e0be8b	`+#ifdef GCM_USE_PPC_VPMSUM`
		e0be8b	`+extern void _gcry_ghash_setup_ppc_vpmsum (void gcm_table, void gcm_key);`
		e0be8b	`+`
		e0be8b	`+/* result is 128-bits */`
		e0be8b	`+extern unsigned int _gcry_ghash_ppc_vpmsum (byte result, void gcm_table,`
		e0be8b	`+ const byte *buf, size_t nblocks);`
		e0be8b	`+`
		e0be8b	`+static void`
		e0be8b	`+ghash_setup_ppc_vpmsum (gcry_cipher_hd_t c)`
		e0be8b	`+{`
		e0be8b	`+ _gcry_ghash_setup_ppc_vpmsum(c->u_mode.gcm.gcm_table, c->u_mode.gcm.u_ghash_key.key);`
		e0be8b	`+}`
		e0be8b	`+`
		e0be8b	`+static unsigned int`
		e0be8b	`+ghash_ppc_vpmsum (gcry_cipher_hd_t c, byte result, const byte buf,`
		e0be8b	`+ size_t nblocks)`
		e0be8b	`+{`
		e0be8b	`+ _gcry_ghash_ppc_vpmsum(result, c->u_mode.gcm.gcm_table, buf,`
		e0be8b	`+ nblocks);`
		e0be8b	`+ return 0;`
		e0be8b	`+}`
		e0be8b	`+#endif /* GCM_USE_PPC_VPMSUM */`
		e0be8b
		e0be8b	`#ifdef GCM_USE_TABLES`
		e0be8b	`static const u16 gcmR[256] = {`
		e0be8b	`@@ -403,7 +425,8 @@ ghash_internal (gcry_cipher_hd_t c, byte result, const byte buf,`
		e0be8b	`static void`
		e0be8b	`setupM (gcry_cipher_hd_t c)`
		e0be8b	`{`
		e0be8b	`-#if defined(GCM_USE_INTEL_PCLMUL) \|\| defined(GCM_USE_ARM_PMULL)`
		e0be8b	`+#if defined(GCM_USE_INTEL_PCLMUL) \|\| defined(GCM_USE_ARM_PMULL) \|\| \`
		e0be8b	`+ defined(GCM_USE_S390X_CRYPTO) \|\| defined(GCM_USE_PPC_VPMSUM)`
		e0be8b	`unsigned int features = _gcry_get_hw_features ();`
		e0be8b	`#endif`
		e0be8b
		e0be8b	`@@ -423,7 +446,24 @@ setupM (gcry_cipher_hd_t c)`
		e0be8b	`ghash_setup_armv8_ce_pmull (c);`
		e0be8b	`}`
		e0be8b	`#endif`
		e0be8b	`- else`
		e0be8b	`+#ifdef GCM_USE_PPC_VPMSUM`
		e0be8b	`+ else if (features & HWF_PPC_VCRYPTO)`
		e0be8b	`+ {`
		e0be8b	`+ c->u_mode.gcm.ghash_fn = ghash_ppc_vpmsum;`
		e0be8b	`+ ghash_setup_ppc_vpmsum (c);`
		e0be8b	`+ }`
		e0be8b	`+#endif`
		e0be8b	`+#ifdef GCM_USE_S390X_CRYPTO`
		e0be8b	`+ else if (features & HWF_S390X_MSA)`
		e0be8b	`+ {`
		e0be8b	`+ if (kimd_query () & km_function_to_mask (KMID_FUNCTION_GHASH))`
		e0be8b	`+ {`
		e0be8b	`+ c->u_mode.gcm.ghash_fn = ghash_s390x_kimd;`
		e0be8b	`+ }`
		e0be8b	`+ }`
		e0be8b	`+#endif`
		e0be8b	`+`
		e0be8b	`+ if (c->u_mode.gcm.ghash_fn == NULL)`
		e0be8b	`{`
		e0be8b	`c->u_mode.gcm.ghash_fn = ghash_internal;`
		e0be8b	`fillM (c);`
		e0be8b	`diff --git a/cipher/cipher-internal.h b/cipher/cipher-internal.h`
		e0be8b	`index a95e084b..a5fd3097 100644`
		e0be8b	`--- a/cipher/cipher-internal.h`
		e0be8b	`+++ b/cipher/cipher-internal.h`
		e0be8b	`@@ -87,6 +87,18 @@`
		e0be8b	`#endif /* GCM_USE_ARM_PMULL */`
		e0be8b
		e0be8b
		e0be8b	`+/* GCM_USE_PPC_VPMSUM indicates whether to compile GCM with PPC Power 8`
		e0be8b	`+ * polynomial multiplication instruction. */`
		e0be8b	`+#undef GCM_USE_PPC_VPMSUM`
		e0be8b	`+#if defined(GCM_USE_TABLES)`
		e0be8b	`+#if defined(ENABLE_PPC_CRYPTO_SUPPORT) && defined(__powerpc64__) && \`
		e0be8b	`+ !defined(WORDS_BIGENDIAN) && defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \`
		e0be8b	`+ defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC) && __GNUC__ >= 4`
		e0be8b	`+# define GCM_USE_PPC_VPMSUM 1`
		e0be8b	`+# define NEED_16BYTE_ALIGNED_CONTEXT 1 /* this also aligns gcm_table */`
		e0be8b	`+#endif`
		e0be8b	`+#endif /* GCM_USE_PPC_VPMSUM */`
		e0be8b	`+`
		e0be8b	`typedef unsigned int (ghash_fn_t) (gcry_cipher_hd_t c, byte result,`
		e0be8b	`const byte *buf, size_t nblocks);`
		e0be8b
		e0be8b	`@@ -277,9 +289,6 @@ struct gcry_cipher_handle`
		e0be8b	`unsigned char key[MAX_BLOCKSIZE];`
		e0be8b	`} u_ghash_key;`
		e0be8b
		e0be8b	`- /* GHASH implementation in use. */`
		e0be8b	`- ghash_fn_t ghash_fn;`
		e0be8b	`-`
		e0be8b	`/* Pre-calculated table for GCM. */`
		e0be8b	`#ifdef GCM_USE_TABLES`
		e0be8b	`#if (SIZEOF_UNSIGNED_LONG == 8 \|\| defined(__x86_64__))`
		e0be8b	`@@ -290,6 +299,9 @@ struct gcry_cipher_handle`
		e0be8b	`u32 gcm_table[4 * 16];`
		e0be8b	`#endif`
		e0be8b	`#endif`
		e0be8b	`+`
		e0be8b	`+ /* GHASH implementation in use. */`
		e0be8b	`+ ghash_fn_t ghash_fn;`
		e0be8b	`} gcm;`
		e0be8b
		e0be8b	`/* Mode specific storage for OCB mode. */`
		e0be8b	`diff --git a/configure.ac b/configure.ac`
		e0be8b	`index be35ce42..202ac888 100644`
		e0be8b	`--- a/configure.ac`
		e0be8b	`+++ b/configure.ac`
		e0be8b	`@@ -2752,6 +2752,25 @@ case "${host}" in`
		e0be8b	`;;`
		e0be8b	`esac`
		e0be8b
		e0be8b	`+# Arch specific GCM implementations`
		e0be8b	`+case "${host}" in`
		e0be8b	`+ powerpc64le--)`
		e0be8b	`+ GCRYPT_DIGESTS="$GCRYPT_DIGESTS cipher-gcm-ppc.lo"`
		e0be8b	`+ ;;`
		e0be8b	`+ powerpc64--)`
		e0be8b	`+ GCRYPT_DIGESTS="$GCRYPT_DIGESTS cipher-gcm-ppc.lo"`
		e0be8b	`+ ;;`
		e0be8b	`+ powerpc--)`
		e0be8b	`+ GCRYPT_DIGESTS="$GCRYPT_DIGESTS cipher-gcm-ppc.lo"`
		e0be8b	`+ ;;`
		e0be8b	`+esac`
		e0be8b	`+`
		e0be8b	`+LIST_MEMBER(sm3, $enabled_digests)`
		e0be8b	`+if test "$found" = "1" ; then`
		e0be8b	`+ GCRYPT_DIGESTS="$GCRYPT_DIGESTS sm3.lo"`
		e0be8b	`+ AC_DEFINE(USE_SM3, 1, [Defined if this module should be included])`
		e0be8b	`+fi`
		e0be8b	`+`
		e0be8b	`LIST_MEMBER(scrypt, $enabled_kdfs)`
		e0be8b	`if test "$found" = "1" ; then`
		e0be8b	`GCRYPT_KDFS="$GCRYPT_KDFS scrypt.lo"`
		e0be8b	`diff --git a/tests/basic.c b/tests/basic.c`
		e0be8b	`index 0bd80201..06808d4a 100644`
		e0be8b	`--- a/tests/basic.c`
		e0be8b	`+++ b/tests/basic.c`
		e0be8b	`@@ -1553,6 +1553,22 @@ _check_gcm_cipher (unsigned int step)`
		e0be8b	`"\x0f\xc0\xc3\xb7\x80\xf2\x44\x45\x2d\xa3\xeb\xf1\xc5\xd8\x2c\xde"`
		e0be8b	`"\xa2\x41\x89\x97\x20\x0e\xf8\x2e\x44\xae\x7e\x3f",`
		e0be8b	`"\xa4\x4a\x82\x66\xee\x1c\x8e\xb0\xc8\xb5\xd4\xcf\x5a\xe9\xf1\x9a" },`
		e0be8b	`+ { GCRY_CIPHER_AES256,`
		e0be8b	`+ "\xfe\xff\xe9\x92\x86\x65\x73\x1c\x6d\x6a\x8f\x94\x67\x30\x83\x08"`
		e0be8b	`+ "\xfe\xff\xe9\x92\x86\x65\x73\x1c\x6d\x6a\x8f\x94\x67\x30\x83\x08",`
		e0be8b	`+ "\xca\xfe\xba\xbe\xfa\xce\xdb\xad\xde\xca\xf8\x88", 12,`
		e0be8b	`+ "\xfe\xed\xfa\xce\xde\xad\xbe\xef\xfe\xed\xfa\xce\xde\xad\xbe\xef"`
		e0be8b	`+ "\xab\xad\xda\xd2", 20,`
		e0be8b	`+ "\xd9\x31\x32\x25\xf8\x84\x06\xe5\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"`
		e0be8b	`+ "\x86\xa7\xa9\x53\x15\x34\xf7\xda\x2e\x4c\x30\x3d\x8a\x31\x8a\x72"`
		e0be8b	`+ "\x1c\x3c\x0c\x95\x95\x68\x09\x53\x2f\xcf\x0e\x24\x49\xa6\xb5\x25"`
		e0be8b	`+ "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57\xba\x63\x7b\x39",`
		e0be8b	`+ 60,`
		e0be8b	`+ "\x52\x2d\xc1\xf0\x99\x56\x7d\x07\xf4\x7f\x37\xa3\x2a\x84\x42\x7d"`
		e0be8b	`+ "\x64\x3a\x8c\xdc\xbf\xe5\xc0\xc9\x75\x98\xa2\xbd\x25\x55\xd1\xaa"`
		e0be8b	`+ "\x8c\xb0\x8e\x48\x59\x0d\xbb\x3d\xa7\xb0\x8b\x10\x56\x82\x88\x38"`
		e0be8b	`+ "\xc5\xf6\x1e\x63\x93\xba\x7a\x0a\xbc\xc9\xf6\x62",`
		e0be8b	`+ "\x76\xfc\x6e\xce\x0f\x4e\x17\x68\xcd\xdf\x88\x53\xbb\x2d\x55\x1b" },`
		e0be8b	`/* Test vectors for overflowing CTR. */`
		e0be8b	`/* After setiv, ctr_low: 0xffffffff */`
		e0be8b	`{ GCRY_CIPHER_AES256,`
		e0be8b
		e0be8b	`diff --git a/cipher/cipher-gcm-ppc.c b/cipher/cipher-gcm-ppc.c`
		e0be8b	`index ed27ef15..2f60c09d 100644`
		e0be8b	`--- a/cipher/cipher-gcm-ppc.c`
		e0be8b	`+++ b/cipher/cipher-gcm-ppc.c`
		e0be8b	`@@ -93,112 +93,157 @@ typedef vector signed char vector16x_s8;`
		e0be8b	`typedef vector unsigned long long vector2x_u64;`
		e0be8b	`typedef vector unsigned long long block;`
		e0be8b
		e0be8b	`+static ASM_FUNC_ATTR_INLINE block`
		e0be8b	`+asm_xor(block a, block b)`
		e0be8b	`+{`
		e0be8b	`+ block r;`
		e0be8b	`+ __asm__ volatile ("xxlxor %x0, %x1, %x2"`
		e0be8b	`+ : "=wa" (r)`
		e0be8b	`+ : "wa" (a), "wa" (b));`
		e0be8b	`+ return r;`
		e0be8b	`+}`
		e0be8b	`+`
		e0be8b	`static ASM_FUNC_ATTR_INLINE block`
		e0be8b	`asm_vpmsumd(block a, block b)`
		e0be8b	`{`
		e0be8b	`block r;`
		e0be8b	`- __asm__("vpmsumd %0, %1, %2"`
		e0be8b	`- : "=v" (r)`
		e0be8b	`- : "v" (a), "v" (b));`
		e0be8b	`+ __asm__ volatile ("vpmsumd %0, %1, %2"`
		e0be8b	`+ : "=v" (r)`
		e0be8b	`+ : "v" (a), "v" (b));`
		e0be8b	`return r;`
		e0be8b	`}`
		e0be8b
		e0be8b	`static ASM_FUNC_ATTR_INLINE block`
		e0be8b	`asm_swap_u64(block a)`
		e0be8b	`{`
		e0be8b	`- __asm__("xxswapd %x0, %x1"`
		e0be8b	`- : "=wa" (a)`
		e0be8b	`- : "wa" (a));`
		e0be8b	`- return a;`
		e0be8b	`+ block r;`
		e0be8b	`+ __asm__ volatile ("xxswapd %x0, %x1"`
		e0be8b	`+ : "=wa" (r)`
		e0be8b	`+ : "wa" (a));`
		e0be8b	`+ return r;`
		e0be8b	`}`
		e0be8b
		e0be8b	`static ASM_FUNC_ATTR_INLINE block`
		e0be8b	`-asm_rot_block_left(block a)`
		e0be8b	`+asm_mergelo(block l, block r)`
		e0be8b	`{`
		e0be8b	`- block zero = {0, 0};`
		e0be8b	`- block mask = {2, 0};`
		e0be8b	`- return __builtin_shuffle(a, zero, mask);`
		e0be8b	`+ block ret;`
		e0be8b	`+ __asm__ volatile ("xxmrgld %x0, %x1, %x2\n\t"`
		e0be8b	`+ : "=wa" (ret)`
		e0be8b	`+ : "wa" (l), "wa" (r));`
		e0be8b	`+ return ret;`
		e0be8b	`}`
		e0be8b
		e0be8b	`static ASM_FUNC_ATTR_INLINE block`
		e0be8b	`-asm_rot_block_right(block a)`
		e0be8b	`+asm_mergehi(block l, block r)`
		e0be8b	`{`
		e0be8b	`- block zero = {0, 0};`
		e0be8b	`- block mask = {1, 2};`
		e0be8b	`- return __builtin_shuffle(a, zero, mask);`
		e0be8b	`+ block ret;`
		e0be8b	`+ __asm__ volatile ("xxmrghd %x0, %x1, %x2\n\t"`
		e0be8b	`+ : "=wa" (ret)`
		e0be8b	`+ : "wa" (l), "wa" (r));`
		e0be8b	`+ return ret;`
		e0be8b	`}`
		e0be8b
		e0be8b	`-/* vsl is a slightly strange function in the way the shift is passed... */`
		e0be8b	`static ASM_FUNC_ATTR_INLINE block`
		e0be8b	`-asm_ashl_128(block a, vector16x_u8 shift)`
		e0be8b	`+asm_rot_block_left(block a)`
		e0be8b	`{`
		e0be8b	`block r;`
		e0be8b	`- __asm__("vsl %0, %1, %2"`
		e0be8b	`- : "=v" (r)`
		e0be8b	`- : "v" (a), "v" (shift));`
		e0be8b	`+ block zero = { 0, 0 };`
		e0be8b	`+ __asm__ volatile ("xxmrgld %x0, %x1, %x2"`
		e0be8b	`+ : "=wa" (r)`
		e0be8b	`+ : "wa" (a), "wa" (zero));`
		e0be8b	`return r;`
		e0be8b	`}`
		e0be8b
		e0be8b	`-#define ALIGNED_LOAD(in_ptr) \`
		e0be8b	`- (vec_aligned_ld (0, (const unsigned char *)(in_ptr)))`
		e0be8b	`+static ASM_FUNC_ATTR_INLINE block`
		e0be8b	`+asm_rot_block_right(block a)`
		e0be8b	`+{`
		e0be8b	`+ block r;`
		e0be8b	`+ block zero = { 0, 0 };`
		e0be8b	`+ __asm__ volatile ("xxsldwi %x0, %x2, %x1, 2"`
		e0be8b	`+ : "=wa" (r)`
		e0be8b	`+ : "wa" (a), "wa" (zero));`
		e0be8b	`+ return r;`
		e0be8b	`+}`
		e0be8b
		e0be8b	`+/* vsl is a slightly strange function in the way the shift is passed... */`
		e0be8b	`static ASM_FUNC_ATTR_INLINE block`
		e0be8b	`-vec_aligned_ld(unsigned long offset, const unsigned char *ptr)`
		e0be8b	`+asm_ashl_128(block a, vector16x_u8 shift)`
		e0be8b	`{`
		e0be8b	`-#ifndef WORDS_BIGENDIAN`
		e0be8b	`- block vec;`
		e0be8b	`- __asm__ ("lvx %0,%1,%2\n\t"`
		e0be8b	`- : "=v" (vec)`
		e0be8b	`- : "r" (offset), "r" ((uintptr_t)ptr)`
		e0be8b	`- : "memory", "r0");`
		e0be8b	`- return vec;`
		e0be8b	`-#else`
		e0be8b	`- return vec_vsx_ld (offset, ptr);`
		e0be8b	`-#endif`
		e0be8b	`+ block r;`
		e0be8b	`+ __asm__ volatile ("vsl %0, %1, %2"`
		e0be8b	`+ : "=v" (r)`
		e0be8b	`+ : "v" (a), "v" (shift));`
		e0be8b	`+ return r;`
		e0be8b	`}`
		e0be8b
		e0be8b	`#define STORE_TABLE(gcm_table, slot, vec) \`
		e0be8b	`- vec_aligned_st (((block)vec), slot * 16, (unsigned char *)(gcm_table));`
		e0be8b	`-`
		e0be8b	`+ vec_store_he (((block)vec), slot * 16, (unsigned char *)(gcm_table));`
		e0be8b
		e0be8b	`static ASM_FUNC_ATTR_INLINE void`
		e0be8b	`-vec_aligned_st(block vec, unsigned long offset, unsigned char *ptr)`
		e0be8b	`+vec_store_he(block vec, unsigned long offset, unsigned char *ptr)`
		e0be8b	`{`
		e0be8b	`#ifndef WORDS_BIGENDIAN`
		e0be8b	`- __asm__ ("stvx %0,%1,%2\n\t"`
		e0be8b	`- :`
		e0be8b	`- : "v" (vec), "r" (offset), "r" ((uintptr_t)ptr)`
		e0be8b	`- : "memory", "r0");`
		e0be8b	`+ /* GCC vec_vsx_ld is generating two instructions on little-endian. Use`
		e0be8b	`+ * lxvd2x directly instead. */`
		e0be8b	`+#if __GNUC__ >= 4`
		e0be8b	`+ if (__builtin_constant_p (offset) && offset == 0)`
		e0be8b	`+ __asm__ volatile ("stxvd2x %x0, 0, %1\n\t"`
		e0be8b	`+ :`
		e0be8b	`+ : "wa" (vec), "r" ((uintptr_t)ptr)`
		e0be8b	`+ : "memory", "r0");`
		e0be8b	`+ else`
		e0be8b	`+#endif`
		e0be8b	`+ __asm__ volatile ("stxvd2x %x0, %1, %2\n\t"`
		e0be8b	`+ :`
		e0be8b	`+ : "wa" (vec), "r" (offset), "r" ((uintptr_t)ptr)`
		e0be8b	`+ : "memory", "r0");`
		e0be8b	`#else`
		e0be8b	`vec_vsx_st ((vector16x_u8)vec, offset, ptr);`
		e0be8b	`#endif`
		e0be8b	`}`
		e0be8b
		e0be8b	`#define VEC_LOAD_BE(in_ptr, bswap_const) \`
		e0be8b	`- (vec_load_be (0, (const unsigned char *)(in_ptr), bswap_const))`
		e0be8b	`+ vec_be_swap(vec_load_he (0, (const unsigned char *)(in_ptr)), bswap_const)`
		e0be8b
		e0be8b	`static ASM_FUNC_ATTR_INLINE block`
		e0be8b	`-vec_load_be(unsigned long offset, const unsigned char *ptr,`
		e0be8b	`- vector unsigned char be_bswap_const)`
		e0be8b	`+vec_load_he(unsigned long offset, const unsigned char *ptr)`
		e0be8b	`{`
		e0be8b	`#ifndef WORDS_BIGENDIAN`
		e0be8b	`block vec;`
		e0be8b	`/* GCC vec_vsx_ld is generating two instructions on little-endian. Use`
		e0be8b	`- * lxvw4x directly instead. */`
		e0be8b	`- __asm__ ("lxvw4x %x0,%1,%2\n\t"`
		e0be8b	`- : "=wa" (vec)`
		e0be8b	`- : "r" (offset), "r" ((uintptr_t)ptr)`
		e0be8b	`- : "memory", "r0");`
		e0be8b	`- __asm__ ("vperm %0,%1,%1,%2\n\t"`
		e0be8b	`- : "=v" (vec)`
		e0be8b	`- : "v" (vec), "v" (be_bswap_const));`
		e0be8b	`+ * lxvd2x directly instead. */`
		e0be8b	`+#if __GNUC__ >= 4`
		e0be8b	`+ if (__builtin_constant_p (offset) && offset == 0)`
		e0be8b	`+ __asm__ volatile ("lxvd2x %x0, 0, %1\n\t"`
		e0be8b	`+ : "=wa" (vec)`
		e0be8b	`+ : "r" ((uintptr_t)ptr)`
		e0be8b	`+ : "memory", "r0");`
		e0be8b	`+ else`
		e0be8b	`+#endif`
		e0be8b	`+ __asm__ volatile ("lxvd2x %x0, %1, %2\n\t"`
		e0be8b	`+ : "=wa" (vec)`
		e0be8b	`+ : "r" (offset), "r" ((uintptr_t)ptr)`
		e0be8b	`+ : "memory", "r0");`
		e0be8b	`return vec;`
		e0be8b	`#else`
		e0be8b	`- (void)be_bswap_const;`
		e0be8b	`return vec_vsx_ld (offset, ptr);`
		e0be8b	`#endif`
		e0be8b	`}`
		e0be8b
		e0be8b	`+static ASM_FUNC_ATTR_INLINE block`
		e0be8b	`+vec_be_swap(block vec, vector16x_u8 be_bswap_const)`
		e0be8b	`+{`
		e0be8b	`+#ifndef WORDS_BIGENDIAN`
		e0be8b	`+ __asm__ volatile ("vperm %0, %1, %1, %2\n\t"`
		e0be8b	`+ : "=v" (vec)`
		e0be8b	`+ : "v" (vec), "v" (be_bswap_const));`
		e0be8b	`+#else`
		e0be8b	`+ (void)be_bswap_const;`
		e0be8b	`+#endif`
		e0be8b	`+ return vec;`
		e0be8b	`+}`
		e0be8b	`+`
		e0be8b	`+`
		e0be8b	`/* Power ghash based on papers:`
		e0be8b	`"The Galois/Counter Mode of Operation (GCM)"; David A. McGrew, John Viega`
		e0be8b	`"Intel® Carry-Less Multiplication Instruction and its Usage for Computing`
		e0be8b	`@@ -216,15 +261,16 @@ vec_load_be(unsigned long offset, const unsigned char *ptr,`
		e0be8b	`void ASM_FUNC_ATTR`
		e0be8b	`_gcry_ghash_setup_ppc_vpmsum (uint64_t gcm_table, void gcm_key)`
		e0be8b	`{`
		e0be8b	`- vector16x_u8 bswap_const =`
		e0be8b	`- { 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 };`
		e0be8b	`- vector16x_u8 c2 =`
		e0be8b	`+ static const vector16x_u8 bswap_const =`
		e0be8b	`+ { ~7, ~6, ~5, ~4, ~3, ~2, ~1, ~0, ~15, ~14, ~13, ~12, ~11, ~10, ~9, ~8 };`
		e0be8b	`+ static const vector16x_u8 c2 =`
		e0be8b	`{ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0b11000010 };`
		e0be8b	`+ static const vector16x_u8 one =`
		e0be8b	`+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };`
		e0be8b	`block T0, T1, T2;`
		e0be8b	`block C2, H, H1, H1l, H1h, H2, H2l, H2h;`
		e0be8b	`block H3l, H3, H3h, H4l, H4, H4h, T3, T4;`
		e0be8b	`vector16x_s8 most_sig_of_H, t7, carry;`
		e0be8b	`- vector16x_u8 one = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };`
		e0be8b
		e0be8b	`H = VEC_LOAD_BE(gcm_key, bswap_const);`
		e0be8b	`most_sig_of_H = vec_splat((vector16x_s8)H, 15);`
		e0be8b	`@@ -255,7 +301,7 @@ _gcry_ghash_setup_ppc_vpmsum (uint64_t gcm_table, void gcm_key)`
		e0be8b	`/* reduce 1 */`
		e0be8b	`T0 = asm_vpmsumd (H2l, C2);`
		e0be8b
		e0be8b	`- H2l ^= asm_rot_block_left (H2);;`
		e0be8b	`+ H2l ^= asm_rot_block_left (H2);`
		e0be8b	`H2h ^= asm_rot_block_right (H2);`
		e0be8b	`H2l = asm_swap_u64 (H2l);`
		e0be8b	`H2l ^= T0;`
		e0be8b	`@@ -321,45 +367,30 @@ _gcry_ghash_setup_ppc_vpmsum (uint64_t gcm_table, void gcm_key)`
		e0be8b	`STORE_TABLE (gcm_table, 12, H4h);`
		e0be8b	`}`
		e0be8b
		e0be8b	`-ASM_FUNC_ATTR_INLINE`
		e0be8b	`-block`
		e0be8b	`-vec_perm2(block l, block r, vector16x_u8 perm) {`
		e0be8b	`- block ret;`
		e0be8b	`- __asm__ ("vperm %0,%1,%2,%3\n\t"`
		e0be8b	`- : "=v" (ret)`
		e0be8b	`- : "v" (l), "v" (r), "v" (perm));`
		e0be8b	`- return ret;`
		e0be8b	`-}`
		e0be8b	`-`
		e0be8b	`void ASM_FUNC_ATTR`
		e0be8b	`-_gcry_ghash_ppc_vpmsum (const byte result, const void const gcm_table,`
		e0be8b	`- const byte *const buf, const size_t nblocks)`
		e0be8b	`+_gcry_ghash_ppc_vpmsum (byte result, const void const gcm_table,`
		e0be8b	`+ const byte *buf, const size_t nblocks)`
		e0be8b	`{`
		e0be8b	`- /* This const is strange, it is reversing the bytes, and also reversing`
		e0be8b	`- the u32s that get switched by lxvw4 and it also addresses bytes big-endian,`
		e0be8b	`- and is here due to lack of proper peep-hole optimization. */`
		e0be8b	`- vector16x_u8 bswap_const =`
		e0be8b	`- { 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 };`
		e0be8b	`- vector16x_u8 bswap_8_const =`
		e0be8b	`- { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };`
		e0be8b	`+ static const vector16x_u8 bswap_const =`
		e0be8b	`+ { ~7, ~6, ~5, ~4, ~3, ~2, ~1, ~0, ~15, ~14, ~13, ~12, ~11, ~10, ~9, ~8 };`
		e0be8b	`block c2, H0l, H0m, H0h, H4l, H4m, H4h, H2m, H3l, H3m, H3h, Hl;`
		e0be8b	`block Hm, Hh, in, in0, in1, in2, in3, Hm_right, Hl_rotate, cur;`
		e0be8b	`- size_t blocks_remaining = nblocks, off = 0;`
		e0be8b	`+ size_t blocks_remaining = nblocks;`
		e0be8b	`size_t not_multiple_of_four;`
		e0be8b	`block t0;`
		e0be8b
		e0be8b	`- cur = vec_load_be (0, result, bswap_const);`
		e0be8b	`+ cur = vec_be_swap (vec_load_he (0, result), bswap_const);`
		e0be8b
		e0be8b	`- c2 = vec_aligned_ld (0, gcm_table);`
		e0be8b	`- H0l = vec_aligned_ld (16, gcm_table);`
		e0be8b	`- H0m = vec_aligned_ld (32, gcm_table);`
		e0be8b	`- H0h = vec_aligned_ld (48, gcm_table);`
		e0be8b	`+ c2 = vec_load_he (0, gcm_table);`
		e0be8b	`+ H0l = vec_load_he (16, gcm_table);`
		e0be8b	`+ H0m = vec_load_he (32, gcm_table);`
		e0be8b	`+ H0h = vec_load_he (48, gcm_table);`
		e0be8b
		e0be8b	`for (not_multiple_of_four = nblocks % 4; not_multiple_of_four;`
		e0be8b	`not_multiple_of_four--)`
		e0be8b	`{`
		e0be8b	`- in = vec_load_be (off, buf, bswap_const);`
		e0be8b	`- off += 16;`
		e0be8b	`+ in = vec_be_swap (vec_load_he (0, buf), bswap_const);`
		e0be8b	`+ buf += 16;`
		e0be8b	`blocks_remaining--;`
		e0be8b	`cur ^= in;`
		e0be8b
		e0be8b	`@@ -385,62 +416,64 @@ _gcry_ghash_ppc_vpmsum (const byte result, const void const gcm_table,`
		e0be8b
		e0be8b	`if (blocks_remaining > 0)`
		e0be8b	`{`
		e0be8b	`- vector16x_u8 hiperm =`
		e0be8b	`- {`
		e0be8b	`- 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10,`
		e0be8b	`- 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0`
		e0be8b	`- };`
		e0be8b	`- vector16x_u8 loperm =`
		e0be8b	`- {`
		e0be8b	`- 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18,`
		e0be8b	`- 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8`
		e0be8b	`- };`
		e0be8b	`block Xl, Xm, Xh, Xl1, Xm1, Xh1, Xm2, Xl3, Xm3, Xh3, Xl_rotate;`
		e0be8b	`block H21l, H21h, merge_l, merge_h;`
		e0be8b	`-`
		e0be8b	`- H2m = vec_aligned_ld (48 + 32, gcm_table);`
		e0be8b	`- H3l = vec_aligned_ld (48 * 2 + 16, gcm_table);`
		e0be8b	`- H3m = vec_aligned_ld (48 * 2 + 32, gcm_table);`
		e0be8b	`- H3h = vec_aligned_ld (48 * 2 + 48, gcm_table);`
		e0be8b	`- H4l = vec_aligned_ld (48 * 3 + 16, gcm_table);`
		e0be8b	`- H4m = vec_aligned_ld (48 * 3 + 32, gcm_table);`
		e0be8b	`- H4h = vec_aligned_ld (48 * 3 + 48, gcm_table);`
		e0be8b	`-`
		e0be8b	`- in0 = vec_load_be (off, buf, bswap_const);`
		e0be8b	`- in1 = vec_load_be (off + 16, buf, bswap_const);`
		e0be8b	`- in2 = vec_load_be (off + 32, buf, bswap_const);`
		e0be8b	`- in3 = vec_load_be (off + 48, buf, bswap_const);`
		e0be8b	`- blocks_remaining -= 4;`
		e0be8b	`- off += 64;`
		e0be8b	`-`
		e0be8b	`- Xh = in0 ^ cur;`
		e0be8b	`+ block t1, t2;`
		e0be8b	`+`
		e0be8b	`+ H2m = vec_load_he (48 + 32, gcm_table);`
		e0be8b	`+ H3l = vec_load_he (48 * 2 + 16, gcm_table);`
		e0be8b	`+ H3m = vec_load_he (48 * 2 + 32, gcm_table);`
		e0be8b	`+ H3h = vec_load_he (48 * 2 + 48, gcm_table);`
		e0be8b	`+ H4l = vec_load_he (48 * 3 + 16, gcm_table);`
		e0be8b	`+ H4m = vec_load_he (48 * 3 + 32, gcm_table);`
		e0be8b	`+ H4h = vec_load_he (48 * 3 + 48, gcm_table);`
		e0be8b	`+`
		e0be8b	`+ in0 = vec_load_he (0, buf);`
		e0be8b	`+ in1 = vec_load_he (16, buf);`
		e0be8b	`+ in2 = vec_load_he (32, buf);`
		e0be8b	`+ in3 = vec_load_he (48, buf);`
		e0be8b	`+ in0 = vec_be_swap(in0, bswap_const);`
		e0be8b	`+ in1 = vec_be_swap(in1, bswap_const);`
		e0be8b	`+ in2 = vec_be_swap(in2, bswap_const);`
		e0be8b	`+ in3 = vec_be_swap(in3, bswap_const);`
		e0be8b	`+`
		e0be8b	`+ Xh = asm_xor (in0, cur);`
		e0be8b
		e0be8b	`Xl1 = asm_vpmsumd (in1, H3l);`
		e0be8b	`Xm1 = asm_vpmsumd (in1, H3m);`
		e0be8b	`Xh1 = asm_vpmsumd (in1, H3h);`
		e0be8b
		e0be8b	`- H21l = vec_perm2 (H2m, H0m, hiperm);`
		e0be8b	`- H21h = vec_perm2 (H2m, H0m, loperm);`
		e0be8b	`- merge_l = vec_perm2 (in2, in3, loperm);`
		e0be8b	`- merge_h = vec_perm2 (in2, in3, hiperm);`
		e0be8b	`+ H21l = asm_mergehi (H2m, H0m);`
		e0be8b	`+ H21h = asm_mergelo (H2m, H0m);`
		e0be8b	`+ merge_l = asm_mergelo (in2, in3);`
		e0be8b	`+ merge_h = asm_mergehi (in2, in3);`
		e0be8b
		e0be8b	`Xm2 = asm_vpmsumd (in2, H2m);`
		e0be8b	`Xl3 = asm_vpmsumd (merge_l, H21l);`
		e0be8b	`Xm3 = asm_vpmsumd (in3, H0m);`
		e0be8b	`Xh3 = asm_vpmsumd (merge_h, H21h);`
		e0be8b
		e0be8b	`- Xm2 ^= Xm1;`
		e0be8b	`- Xl3 ^= Xl1;`
		e0be8b	`- Xm3 ^= Xm2;`
		e0be8b	`- Xh3 ^= Xh1;`
		e0be8b	`+ Xm2 = asm_xor (Xm2, Xm1);`
		e0be8b	`+ Xl3 = asm_xor (Xl3, Xl1);`
		e0be8b	`+ Xm3 = asm_xor (Xm3, Xm2);`
		e0be8b	`+ Xh3 = asm_xor (Xh3, Xh1);`
		e0be8b
		e0be8b	`/* Gerald Estrin's scheme for parallel multiplication of polynomials */`
		e0be8b	`- for (;blocks_remaining > 0; blocks_remaining -= 4, off += 64)`
		e0be8b	`+ while (1)`
		e0be8b	`{`
		e0be8b	`- in0 = vec_load_be (off, buf, bswap_const);`
		e0be8b	`- in1 = vec_load_be (off + 16, buf, bswap_const);`
		e0be8b	`- in2 = vec_load_be (off + 32, buf, bswap_const);`
		e0be8b	`- in3 = vec_load_be (off + 48, buf, bswap_const);`
		e0be8b	`+ buf += 64;`
		e0be8b	`+ blocks_remaining -= 4;`
		e0be8b	`+ if (!blocks_remaining)`
		e0be8b	`+ break;`
		e0be8b	`+`
		e0be8b	`+ in0 = vec_load_he (0, buf);`
		e0be8b	`+ in1 = vec_load_he (16, buf);`
		e0be8b	`+ in2 = vec_load_he (32, buf);`
		e0be8b	`+ in3 = vec_load_he (48, buf);`
		e0be8b	`+ in1 = vec_be_swap(in1, bswap_const);`
		e0be8b	`+ in2 = vec_be_swap(in2, bswap_const);`
		e0be8b	`+ in3 = vec_be_swap(in3, bswap_const);`
		e0be8b	`+ in0 = vec_be_swap(in0, bswap_const);`
		e0be8b
		e0be8b	`Xl = asm_vpmsumd (Xh, H4l);`
		e0be8b	`Xm = asm_vpmsumd (Xh, H4m);`
		e0be8b	`@@ -449,62 +482,63 @@ _gcry_ghash_ppc_vpmsum (const byte result, const void const gcm_table,`
		e0be8b	`Xm1 = asm_vpmsumd (in1, H3m);`
		e0be8b	`Xh1 = asm_vpmsumd (in1, H3h);`
		e0be8b
		e0be8b	`- Xl ^= Xl3;`
		e0be8b	`- Xm ^= Xm3;`
		e0be8b	`- Xh ^= Xh3;`
		e0be8b	`- merge_l = vec_perm2 (in2, in3, loperm);`
		e0be8b	`- merge_h = vec_perm2 (in2, in3, hiperm);`
		e0be8b	`+ Xl = asm_xor (Xl, Xl3);`
		e0be8b	`+ Xm = asm_xor (Xm, Xm3);`
		e0be8b	`+ Xh = asm_xor (Xh, Xh3);`
		e0be8b	`+ merge_l = asm_mergelo (in2, in3);`
		e0be8b	`+ merge_h = asm_mergehi (in2, in3);`
		e0be8b
		e0be8b	`t0 = asm_vpmsumd (Xl, c2);`
		e0be8b	`Xl3 = asm_vpmsumd (merge_l, H21l);`
		e0be8b	`Xh3 = asm_vpmsumd (merge_h, H21h);`
		e0be8b
		e0be8b	`- Xl ^= asm_rot_block_left (Xm);`
		e0be8b	`- Xh ^= asm_rot_block_right (Xm);`
		e0be8b	`+ t1 = asm_rot_block_left (Xm);`
		e0be8b	`+ t2 = asm_rot_block_right (Xm);`
		e0be8b	`+ Xl = asm_xor(Xl, t1);`
		e0be8b	`+ Xh = asm_xor(Xh, t2);`
		e0be8b
		e0be8b	`Xl = asm_swap_u64 (Xl);`
		e0be8b	`- Xl ^= t0;`
		e0be8b	`+ Xl = asm_xor(Xl, t0);`
		e0be8b
		e0be8b	`Xl_rotate = asm_swap_u64 (Xl);`
		e0be8b	`Xm2 = asm_vpmsumd (in2, H2m);`
		e0be8b	`Xm3 = asm_vpmsumd (in3, H0m);`
		e0be8b	`Xl = asm_vpmsumd (Xl, c2);`
		e0be8b
		e0be8b	`- Xl3 ^= Xl1;`
		e0be8b	`- Xh3 ^= Xh1;`
		e0be8b	`- Xh ^= in0;`
		e0be8b	`- Xm2 ^= Xm1;`
		e0be8b	`- Xh ^= Xl_rotate;`
		e0be8b	`- Xm3 ^= Xm2;`
		e0be8b	`- Xh ^= Xl;`
		e0be8b	`+ Xl3 = asm_xor (Xl3, Xl1);`
		e0be8b	`+ Xh3 = asm_xor (Xh3, Xh1);`
		e0be8b	`+ Xh = asm_xor (Xh, in0);`
		e0be8b	`+ Xm2 = asm_xor (Xm2, Xm1);`
		e0be8b	`+ Xh = asm_xor (Xh, Xl_rotate);`
		e0be8b	`+ Xm3 = asm_xor (Xm3, Xm2);`
		e0be8b	`+ Xh = asm_xor (Xh, Xl);`
		e0be8b	`}`
		e0be8b
		e0be8b	`Xl = asm_vpmsumd (Xh, H4l);`
		e0be8b	`Xm = asm_vpmsumd (Xh, H4m);`
		e0be8b	`Xh = asm_vpmsumd (Xh, H4h);`
		e0be8b
		e0be8b	`- Xl ^= Xl3;`
		e0be8b	`- Xm ^= Xm3;`
		e0be8b	`+ Xl = asm_xor (Xl, Xl3);`
		e0be8b	`+ Xm = asm_xor (Xm, Xm3);`
		e0be8b
		e0be8b	`t0 = asm_vpmsumd (Xl, c2);`
		e0be8b
		e0be8b	`- Xh ^= Xh3;`
		e0be8b	`- Xl ^= asm_rot_block_left (Xm);`
		e0be8b	`- Xh ^= asm_rot_block_right (Xm);`
		e0be8b	`+ Xh = asm_xor (Xh, Xh3);`
		e0be8b	`+ t1 = asm_rot_block_left (Xm);`
		e0be8b	`+ t2 = asm_rot_block_right (Xm);`
		e0be8b	`+ Xl = asm_xor (Xl, t1);`
		e0be8b	`+ Xh = asm_xor (Xh, t2);`
		e0be8b
		e0be8b	`Xl = asm_swap_u64 (Xl);`
		e0be8b	`- Xl ^= t0;`
		e0be8b	`+ Xl = asm_xor (Xl, t0);`
		e0be8b
		e0be8b	`Xl_rotate = asm_swap_u64 (Xl);`
		e0be8b	`Xl = asm_vpmsumd (Xl, c2);`
		e0be8b	`- Xl_rotate ^= Xh;`
		e0be8b	`- Xl ^= Xl_rotate;`
		e0be8b	`-`
		e0be8b	`- cur = Xl;`
		e0be8b	`+ Xh = asm_xor (Xh, Xl_rotate);`
		e0be8b	`+ cur = asm_xor (Xh, Xl);`
		e0be8b	`}`
		e0be8b
		e0be8b	`- cur = (block)vec_perm ((vector16x_u8)cur, (vector16x_u8)cur, bswap_8_const);`
		e0be8b	`- STORE_TABLE (result, 0, cur);`
		e0be8b	`+ vec_store_he (vec_be_swap (cur, bswap_const), 0, result);`
		e0be8b	`}`
		e0be8b
		e0be8b	`#endif /* GCM_USE_PPC_VPMSUM */`
		e0be8b
		e0be8b	`diff --git a/cipher/Makefile.am b/cipher/Makefile.am`
		e0be8b	`index ab5d2a38..7a777ef2 100644`
		e0be8b	`--- a/cipher/Makefile.am`
		e0be8b	`+++ b/cipher/Makefile.am`
		e0be8b	`@@ -42,8 +42,7 @@ libcipher_la_LIBADD = $(GCRYPT_MODULES)`
		e0be8b	`libcipher_la_SOURCES = \`
		e0be8b	`cipher.c cipher-internal.h \`
		e0be8b	`cipher-cbc.c cipher-cfb.c cipher-ofb.c cipher-ctr.c cipher-aeswrap.c \`
		e0be8b	`-cipher-ccm.c cipher-cmac.c cipher-gcm.c cipher-gcm-intel-pclmul.c \`
		e0be8b	`- cipher-gcm-armv8-aarch32-ce.S cipher-gcm-armv8-aarch64-ce.S \`
		e0be8b	`+cipher-ccm.c cipher-cmac.c cipher-gcm.c \`
		e0be8b	`cipher-poly1305.c cipher-ocb.c cipher-xts.c \`
		e0be8b	`cipher-selftest.c cipher-selftest.h \`
		e0be8b	`pubkey.c pubkey-internal.h pubkey-util.c \`
		e0be8b	`@@ -66,7 +65,8 @@ blowfish.c blowfish-amd64.S blowfish-arm.S \`
		e0be8b	`cast5.c cast5-amd64.S cast5-arm.S \`
		e0be8b	`chacha20.c chacha20-sse2-amd64.S chacha20-ssse3-amd64.S chacha20-avx2-amd64.S \`
		e0be8b	`chacha20-armv7-neon.S \`
		e0be8b	`-cipher-gcm-ppc.c \`
		e0be8b	`+cipher-gcm-ppc.c cipher-gcm-intel-pclmul.c \`
		e0be8b	`+ cipher-gcm-armv8-aarch32-ce.S cipher-gcm-armv8-aarch64-ce.S \`
		e0be8b	`crc.c \`
		e0be8b	`crc-intel-pclmul.c crc-ppc.c \`
		e0be8b	`des.c des-amd64.S \`
		e0be8b	`diff --git a/configure.ac b/configure.ac`
		e0be8b	`index fd447906..9bcb1318 100644`
		e0be8b	`--- a/configure.ac`
		e0be8b	`+++ b/configure.ac`
		e0be8b	`@@ -2754,14 +2754,18 @@ esac`
		e0be8b
		e0be8b	`# Arch specific GCM implementations`
		e0be8b	`case "${host}" in`
		e0be8b	`- powerpc64le--)`
		e0be8b	`- GCRYPT_DIGESTS="$GCRYPT_DIGESTS cipher-gcm-ppc.lo"`
		e0be8b	`+ i?86-- \| x86_64--)`
		e0be8b	`+ GCRYPT_DIGESTS="$GCRYPT_DIGESTS cipher-gcm-intel-pclmul.lo"`
		e0be8b	`;;`
		e0be8b	`- powerpc64--)`
		e0be8b	`- GCRYPT_DIGESTS="$GCRYPT_DIGESTS cipher-gcm-ppc.lo"`
		e0be8b	`+ arm--*)`
		e0be8b	`+ GCRYPT_DIGESTS="$GCRYPT_DIGESTS cipher-gcm-armv7-neon.lo"`
		e0be8b	`+ GCRYPT_DIGESTS="$GCRYPT_DIGESTS cipher-gcm-armv8-aarch32-ce.lo"`
		e0be8b	`+ ;;`
		e0be8b	`+ aarch64--)`
		e0be8b	`+ GCRYPT_DIGESTS="$GCRYPT_DIGESTS cipher-gcm-armv8-aarch64-ce.lo"`
		e0be8b	`;;`
		e0be8b	`- powerpc--)`
		e0be8b	`- GCRYPT_DIGESTS="$GCRYPT_DIGESTS cipher-gcm-ppc.lo"`
		e0be8b	`+ powerpc64le-- \| powerpc64-- \| powerpc--)`
		e0be8b	`+ GCRYPT_DIGESTS="$GCRYPT_DIGESTS cipher-gcm-ppc.lo"`
		e0be8b	`;;`
		e0be8b	`esac`
		e0be8b

rpms / libgcrypt

Source Code

Blame SOURCES/libgcrypt-1.8.5-ppc-aes-gcm.patch