Blob Blame Raw
diff -up libgcrypt-1.8.5/cipher/arcfour.c.aes-perf libgcrypt-1.8.5/cipher/arcfour.c
--- libgcrypt-1.8.5/cipher/arcfour.c.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/arcfour.c	2020-04-22 18:29:41.662862382 +0200
@@ -184,10 +184,12 @@ do_arcfour_setkey (void *context, const
 }
 
 static gcry_err_code_t
-arcfour_setkey ( void *context, const byte *key, unsigned int keylen )
+arcfour_setkey ( void *context, const byte *key, unsigned int keylen,
+                 gcry_cipher_hd_t hd )
 {
   ARCFOUR_context *ctx = (ARCFOUR_context *) context;
   gcry_err_code_t rc = do_arcfour_setkey (ctx, key, keylen );
+  (void)hd;
   return rc;
 }
 
@@ -207,11 +209,11 @@ selftest(void)
   static const byte ciphertext_1[] =
     { 0xF1, 0x38, 0x29, 0xC9, 0xDE };
 
-  arcfour_setkey( &ctx, key_1, sizeof(key_1));
+  arcfour_setkey( &ctx, key_1, sizeof(key_1), NULL);
   encrypt_stream( &ctx, scratch, plaintext_1, sizeof(plaintext_1));
   if ( memcmp (scratch, ciphertext_1, sizeof (ciphertext_1)))
     return "Arcfour encryption test 1 failed.";
-  arcfour_setkey( &ctx, key_1, sizeof(key_1));
+  arcfour_setkey( &ctx, key_1, sizeof(key_1), NULL);
   encrypt_stream(&ctx, scratch, scratch, sizeof(plaintext_1)); /* decrypt */
   if ( memcmp (scratch, plaintext_1, sizeof (plaintext_1)))
     return "Arcfour decryption test 1 failed.";
diff -up libgcrypt-1.8.5/cipher/blowfish.c.aes-perf libgcrypt-1.8.5/cipher/blowfish.c
--- libgcrypt-1.8.5/cipher/blowfish.c.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/blowfish.c	2020-04-22 18:29:41.663862363 +0200
@@ -37,6 +37,7 @@
 #include "g10lib.h"
 #include "cipher.h"
 #include "bufhelp.h"
+#include "cipher-internal.h"
 #include "cipher-selftest.h"
 
 #define BLOWFISH_BLOCKSIZE 8
@@ -67,7 +68,8 @@ typedef struct {
     u32 p[BLOWFISH_ROUNDS+2];
 } BLOWFISH_context;
 
-static gcry_err_code_t bf_setkey (void *c, const byte *key, unsigned keylen);
+static gcry_err_code_t bf_setkey (void *c, const byte *key, unsigned keylen,
+                                  gcry_cipher_hd_t hd);
 static unsigned int encrypt_block (void *bc, byte *outbuf, const byte *inbuf);
 static unsigned int decrypt_block (void *bc, byte *outbuf, const byte *inbuf);
 
@@ -703,7 +705,7 @@ _gcry_blowfish_ctr_enc(void *context, un
       /* Encrypt the counter. */
       do_encrypt_block(ctx, tmpbuf, ctr);
       /* XOR the input with the encrypted counter and store in output.  */
-      buf_xor(outbuf, tmpbuf, inbuf, BLOWFISH_BLOCKSIZE);
+      cipher_block_xor(outbuf, tmpbuf, inbuf, BLOWFISH_BLOCKSIZE);
       outbuf += BLOWFISH_BLOCKSIZE;
       inbuf  += BLOWFISH_BLOCKSIZE;
       /* Increment the counter.  */
@@ -771,7 +773,7 @@ _gcry_blowfish_cbc_dec(void *context, un
          the intermediate result to SAVEBUF.  */
       do_decrypt_block (ctx, savebuf, inbuf);
 
-      buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, BLOWFISH_BLOCKSIZE);
+      cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, BLOWFISH_BLOCKSIZE);
       inbuf += BLOWFISH_BLOCKSIZE;
       outbuf += BLOWFISH_BLOCKSIZE;
     }
@@ -828,7 +830,7 @@ _gcry_blowfish_cfb_dec(void *context, un
   for ( ;nblocks; nblocks-- )
     {
       do_encrypt_block(ctx, iv, iv);
-      buf_xor_n_copy(outbuf, iv, inbuf, BLOWFISH_BLOCKSIZE);
+      cipher_block_xor_n_copy(outbuf, iv, inbuf, BLOWFISH_BLOCKSIZE);
       outbuf += BLOWFISH_BLOCKSIZE;
       inbuf  += BLOWFISH_BLOCKSIZE;
     }
@@ -897,7 +899,7 @@ selftest(void)
   const char *r;
 
   bf_setkey( (void *) &c,
-             (const unsigned char*)"abcdefghijklmnopqrstuvwxyz", 26 );
+             (const unsigned char*)"abcdefghijklmnopqrstuvwxyz", 26, NULL );
   encrypt_block( (void *) &c, buffer, plain );
   if( memcmp( buffer, "\x32\x4E\xD0\xFE\xF4\x13\xA2\x03", 8 ) )
     return "Blowfish selftest failed (1).";
@@ -905,7 +907,7 @@ selftest(void)
   if( memcmp( buffer, plain, 8 ) )
     return "Blowfish selftest failed (2).";
 
-  bf_setkey( (void *) &c, key3, 8 );
+  bf_setkey( (void *) &c, key3, 8, NULL );
   encrypt_block( (void *) &c, buffer, plain3 );
   if( memcmp( buffer, cipher3, 8 ) )
     return "Blowfish selftest failed (3).";
@@ -1095,10 +1097,12 @@ do_bf_setkey (BLOWFISH_context *c, const
 
 
 static gcry_err_code_t
-bf_setkey (void *context, const byte *key, unsigned keylen)
+bf_setkey (void *context, const byte *key, unsigned keylen,
+           gcry_cipher_hd_t hd)
 {
   BLOWFISH_context *c = (BLOWFISH_context *) context;
   gcry_err_code_t rc = do_bf_setkey (c, key, keylen);
+  (void)hd;
   return rc;
 }
 
diff -up libgcrypt-1.8.5/cipher/bufhelp.h.aes-perf libgcrypt-1.8.5/cipher/bufhelp.h
--- libgcrypt-1.8.5/cipher/bufhelp.h.aes-perf	2018-04-17 17:35:28.000000000 +0200
+++ libgcrypt-1.8.5/cipher/bufhelp.h	2020-04-22 18:29:41.663862363 +0200
@@ -450,7 +450,21 @@ static inline void buf_put_le64(void *_b
   out->a = le_bswap64(val);
 }
 
-
 #endif /*BUFHELP_UNALIGNED_ACCESS*/
 
+
+/* Host-endian get/put macros */
+#ifdef WORDS_BIGENDIAN
+# define buf_get_he32 buf_get_be32
+# define buf_put_he32 buf_put_be32
+# define buf_get_he64 buf_get_be64
+# define buf_put_he64 buf_put_be64
+#else
+# define buf_get_he32 buf_get_le32
+# define buf_put_he32 buf_put_le32
+# define buf_get_he64 buf_get_le64
+# define buf_put_he64 buf_put_le64
+#endif
+
+
 #endif /*GCRYPT_BUFHELP_H*/
diff -up libgcrypt-1.8.5/cipher/camellia-glue.c.aes-perf libgcrypt-1.8.5/cipher/camellia-glue.c
--- libgcrypt-1.8.5/cipher/camellia-glue.c.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/camellia-glue.c	2020-04-22 18:29:41.664862344 +0200
@@ -204,7 +204,8 @@ extern void _gcry_camellia_aesni_avx2_oc
 static const char *selftest(void);
 
 static gcry_err_code_t
-camellia_setkey(void *c, const byte *key, unsigned keylen)
+camellia_setkey(void *c, const byte *key, unsigned keylen,
+                gcry_cipher_hd_t hd)
 {
   CAMELLIA_context *ctx=c;
   static int initialized=0;
@@ -213,6 +214,8 @@ camellia_setkey(void *c, const byte *key
   unsigned int hwf = _gcry_get_hw_features ();
 #endif
 
+  (void)hd;
+
   if(keylen!=16 && keylen!=24 && keylen!=32)
     return GPG_ERR_INV_KEYLEN;
 
@@ -427,7 +430,7 @@ _gcry_camellia_ctr_enc(void *context, un
       /* Encrypt the counter. */
       Camellia_EncryptBlock(ctx->keybitlength, ctr, ctx->keytable, tmpbuf);
       /* XOR the input with the encrypted counter and store in output.  */
-      buf_xor(outbuf, tmpbuf, inbuf, CAMELLIA_BLOCK_SIZE);
+      cipher_block_xor(outbuf, tmpbuf, inbuf, CAMELLIA_BLOCK_SIZE);
       outbuf += CAMELLIA_BLOCK_SIZE;
       inbuf  += CAMELLIA_BLOCK_SIZE;
       /* Increment the counter.  */
@@ -520,7 +523,8 @@ _gcry_camellia_cbc_dec(void *context, un
          the intermediate result to SAVEBUF.  */
       Camellia_DecryptBlock(ctx->keybitlength, inbuf, ctx->keytable, savebuf);
 
-      buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, CAMELLIA_BLOCK_SIZE);
+      cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf,
+                                CAMELLIA_BLOCK_SIZE);
       inbuf += CAMELLIA_BLOCK_SIZE;
       outbuf += CAMELLIA_BLOCK_SIZE;
     }
@@ -602,7 +606,7 @@ _gcry_camellia_cfb_dec(void *context, un
   for ( ;nblocks; nblocks-- )
     {
       Camellia_EncryptBlock(ctx->keybitlength, iv, ctx->keytable, iv);
-      buf_xor_n_copy(outbuf, iv, inbuf, CAMELLIA_BLOCK_SIZE);
+      cipher_block_xor_n_copy(outbuf, iv, inbuf, CAMELLIA_BLOCK_SIZE);
       outbuf += CAMELLIA_BLOCK_SIZE;
       inbuf  += CAMELLIA_BLOCK_SIZE;
     }
@@ -991,7 +995,7 @@ selftest(void)
       0x20,0xef,0x7c,0x91,0x9e,0x3a,0x75,0x09
     };
 
-  camellia_setkey(&ctx,key_128,sizeof(key_128));
+  camellia_setkey(&ctx,key_128,sizeof(key_128),NULL);
   camellia_encrypt(&ctx,scratch,plaintext);
   if(memcmp(scratch,ciphertext_128,sizeof(ciphertext_128))!=0)
     return "CAMELLIA-128 test encryption failed.";
@@ -999,7 +1003,7 @@ selftest(void)
   if(memcmp(scratch,plaintext,sizeof(plaintext))!=0)
     return "CAMELLIA-128 test decryption failed.";
 
-  camellia_setkey(&ctx,key_192,sizeof(key_192));
+  camellia_setkey(&ctx,key_192,sizeof(key_192),NULL);
   camellia_encrypt(&ctx,scratch,plaintext);
   if(memcmp(scratch,ciphertext_192,sizeof(ciphertext_192))!=0)
     return "CAMELLIA-192 test encryption failed.";
@@ -1007,7 +1011,7 @@ selftest(void)
   if(memcmp(scratch,plaintext,sizeof(plaintext))!=0)
     return "CAMELLIA-192 test decryption failed.";
 
-  camellia_setkey(&ctx,key_256,sizeof(key_256));
+  camellia_setkey(&ctx,key_256,sizeof(key_256),NULL);
   camellia_encrypt(&ctx,scratch,plaintext);
   if(memcmp(scratch,ciphertext_256,sizeof(ciphertext_256))!=0)
     return "CAMELLIA-256 test encryption failed.";
diff -up libgcrypt-1.8.5/cipher/cast5.c.aes-perf libgcrypt-1.8.5/cipher/cast5.c
--- libgcrypt-1.8.5/cipher/cast5.c.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/cast5.c	2020-04-22 18:29:41.665862325 +0200
@@ -44,6 +44,7 @@
 #include "cipher.h"
 #include "bithelp.h"
 #include "bufhelp.h"
+#include "cipher-internal.h"
 #include "cipher-selftest.h"
 
 /* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
@@ -72,7 +73,8 @@ typedef struct {
 #endif
 } CAST5_context;
 
-static gcry_err_code_t cast_setkey (void *c, const byte *key, unsigned keylen);
+static gcry_err_code_t cast_setkey (void *c, const byte *key, unsigned keylen,
+                                    gcry_cipher_hd_t hd);
 static unsigned int encrypt_block (void *c, byte *outbuf, const byte *inbuf);
 static unsigned int decrypt_block (void *c, byte *outbuf, const byte *inbuf);
 
@@ -671,7 +673,7 @@ _gcry_cast5_ctr_enc(void *context, unsig
       /* Encrypt the counter. */
       do_encrypt_block(ctx, tmpbuf, ctr);
       /* XOR the input with the encrypted counter and store in output.  */
-      buf_xor(outbuf, tmpbuf, inbuf, CAST5_BLOCKSIZE);
+      cipher_block_xor(outbuf, tmpbuf, inbuf, CAST5_BLOCKSIZE);
       outbuf += CAST5_BLOCKSIZE;
       inbuf  += CAST5_BLOCKSIZE;
       /* Increment the counter.  */
@@ -739,7 +741,7 @@ _gcry_cast5_cbc_dec(void *context, unsig
          the intermediate result to SAVEBUF.  */
       do_decrypt_block (ctx, savebuf, inbuf);
 
-      buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, CAST5_BLOCKSIZE);
+      cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, CAST5_BLOCKSIZE);
       inbuf += CAST5_BLOCKSIZE;
       outbuf += CAST5_BLOCKSIZE;
     }
@@ -795,7 +797,7 @@ _gcry_cast5_cfb_dec(void *context, unsig
   for ( ;nblocks; nblocks-- )
     {
       do_encrypt_block(ctx, iv, iv);
-      buf_xor_n_copy(outbuf, iv, inbuf, CAST5_BLOCKSIZE);
+      cipher_block_xor_n_copy(outbuf, iv, inbuf, CAST5_BLOCKSIZE);
       outbuf += CAST5_BLOCKSIZE;
       inbuf  += CAST5_BLOCKSIZE;
     }
@@ -863,7 +865,7 @@ selftest(void)
     byte buffer[8];
     const char *r;
 
-    cast_setkey( &c, key, 16 );
+    cast_setkey( &c, key, 16, NULL );
     encrypt_block( &c, buffer, plain );
     if( memcmp( buffer, cipher, 8 ) )
 	return "1";
@@ -884,10 +886,10 @@ selftest(void)
 			0x80,0xAC,0x05,0xB8,0xE8,0x3D,0x69,0x6E };
 
 	for(i=0; i < 1000000; i++ ) {
-	    cast_setkey( &c, b0, 16 );
+	    cast_setkey( &c, b0, 16, NULL );
 	    encrypt_block( &c, a0, a0 );
 	    encrypt_block( &c, a0+8, a0+8 );
-	    cast_setkey( &c, a0, 16 );
+	    cast_setkey( &c, a0, 16, NULL );
 	    encrypt_block( &c, b0, b0 );
 	    encrypt_block( &c, b0+8, b0+8 );
 	}
@@ -1029,10 +1031,12 @@ do_cast_setkey( CAST5_context *c, const
 }
 
 static gcry_err_code_t
-cast_setkey (void *context, const byte *key, unsigned keylen )
+cast_setkey (void *context, const byte *key, unsigned keylen,
+             gcry_cipher_hd_t hd )
 {
   CAST5_context *c = (CAST5_context *) context;
   gcry_err_code_t rc = do_cast_setkey (c, key, keylen);
+  (void)hd;
   return rc;
 }
 
diff -up libgcrypt-1.8.5/cipher/chacha20.c.aes-perf libgcrypt-1.8.5/cipher/chacha20.c
--- libgcrypt-1.8.5/cipher/chacha20.c.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/chacha20.c	2020-04-22 18:29:41.665862325 +0200
@@ -419,10 +419,12 @@ chacha20_do_setkey (CHACHA20_context_t *
 
 
 static gcry_err_code_t
-chacha20_setkey (void *context, const byte * key, unsigned int keylen)
+chacha20_setkey (void *context, const byte *key, unsigned int keylen,
+                 gcry_cipher_hd_t hd)
 {
   CHACHA20_context_t *ctx = (CHACHA20_context_t *) context;
   gcry_err_code_t rc = chacha20_do_setkey (ctx, key, keylen);
+  (void)hd;
   _gcry_burn_stack (4 + sizeof (void *) + 4 * sizeof (void *));
   return rc;
 }
@@ -569,7 +571,7 @@ selftest (void)
   /* 16-byte alignment required for amd64 implementation. */
   ctx = (CHACHA20_context_t *)((uintptr_t)(ctxbuf + 15) & ~(uintptr_t)15);
 
-  chacha20_setkey (ctx, key_1, sizeof key_1);
+  chacha20_setkey (ctx, key_1, sizeof key_1, NULL);
   chacha20_setiv (ctx, nonce_1, sizeof nonce_1);
   scratch[sizeof (scratch) - 1] = 0;
   chacha20_encrypt_stream (ctx, scratch, plaintext_1, sizeof plaintext_1);
@@ -577,7 +579,7 @@ selftest (void)
     return "ChaCha20 encryption test 1 failed.";
   if (scratch[sizeof (scratch) - 1])
     return "ChaCha20 wrote too much.";
-  chacha20_setkey (ctx, key_1, sizeof (key_1));
+  chacha20_setkey (ctx, key_1, sizeof (key_1), NULL);
   chacha20_setiv (ctx, nonce_1, sizeof nonce_1);
   chacha20_encrypt_stream (ctx, scratch, scratch, sizeof plaintext_1);
   if (memcmp (scratch, plaintext_1, sizeof plaintext_1))
@@ -585,12 +587,12 @@ selftest (void)
 
   for (i = 0; i < sizeof buf; i++)
     buf[i] = i;
-  chacha20_setkey (ctx, key_1, sizeof key_1);
+  chacha20_setkey (ctx, key_1, sizeof key_1, NULL);
   chacha20_setiv (ctx, nonce_1, sizeof nonce_1);
   /*encrypt */
   chacha20_encrypt_stream (ctx, buf, buf, sizeof buf);
   /*decrypt */
-  chacha20_setkey (ctx, key_1, sizeof key_1);
+  chacha20_setkey (ctx, key_1, sizeof key_1, NULL);
   chacha20_setiv (ctx, nonce_1, sizeof nonce_1);
   chacha20_encrypt_stream (ctx, buf, buf, 1);
   chacha20_encrypt_stream (ctx, buf + 1, buf + 1, (sizeof buf) - 1 - 1);
@@ -600,13 +602,13 @@ selftest (void)
     if (buf[i] != (byte) i)
       return "ChaCha20 encryption test 2 failed.";
 
-  chacha20_setkey (ctx, key_1, sizeof key_1);
+  chacha20_setkey (ctx, key_1, sizeof key_1, NULL);
   chacha20_setiv (ctx, nonce_1, sizeof nonce_1);
   /* encrypt */
   for (i = 0; i < sizeof buf; i++)
     chacha20_encrypt_stream (ctx, &buf[i], &buf[i], 1);
   /* decrypt */
-  chacha20_setkey (ctx, key_1, sizeof key_1);
+  chacha20_setkey (ctx, key_1, sizeof key_1, NULL);
   chacha20_setiv (ctx, nonce_1, sizeof nonce_1);
   chacha20_encrypt_stream (ctx, buf, buf, sizeof buf);
   for (i = 0; i < sizeof buf; i++)
diff -up libgcrypt-1.8.5/cipher/cipher-aeswrap.c.aes-perf libgcrypt-1.8.5/cipher/cipher-aeswrap.c
--- libgcrypt-1.8.5/cipher/cipher-aeswrap.c.aes-perf	2018-04-17 17:49:00.000000000 +0200
+++ libgcrypt-1.8.5/cipher/cipher-aeswrap.c	2020-04-22 18:29:41.665862325 +0200
@@ -99,7 +99,7 @@ _gcry_cipher_aeswrap_encrypt (gcry_ciphe
 		break;
 	    }
           /* A := MSB_64(B) ^ t */
-	  buf_xor(a, b, t, 8);
+	  cipher_block_xor(a, b, t, 8);
           /* R[i] := LSB_64(B) */
           memcpy (r+i*8, b+8, 8);
         }
@@ -170,7 +170,7 @@ _gcry_cipher_aeswrap_decrypt (gcry_ciphe
       for (i = n; i >= 1; i--)
         {
           /* B := AES_k^1( (A ^ t)| R[i] ) */
-	  buf_xor(b, a, t, 8);
+	  cipher_block_xor(b, a, t, 8);
           memcpy (b+8, r+(i-1)*8, 8);
           nburn = c->spec->decrypt (&c->context.c, b, b);
           burn = nburn > burn ? nburn : burn;
diff -up libgcrypt-1.8.5/cipher/cipher.c.aes-perf libgcrypt-1.8.5/cipher/cipher.c
--- libgcrypt-1.8.5/cipher/cipher.c.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/cipher.c	2020-04-22 18:29:41.666862306 +0200
@@ -92,6 +92,8 @@ static gcry_cipher_spec_t *cipher_list[]
 
 
 
+static void _gcry_cipher_setup_mode_ops(gcry_cipher_hd_t c, int mode);
+
 
 static int
 map_algo (int algo)
@@ -532,6 +534,7 @@ _gcry_cipher_open_internal (gcry_cipher_
               h->bulk.ctr_enc = _gcry_aes_ctr_enc;
               h->bulk.ocb_crypt = _gcry_aes_ocb_crypt;
               h->bulk.ocb_auth  = _gcry_aes_ocb_auth;
+              h->bulk.xts_crypt = _gcry_aes_xts_crypt;
               break;
 #endif /*USE_AES*/
 #ifdef USE_BLOWFISH
@@ -592,6 +595,9 @@ _gcry_cipher_open_internal (gcry_cipher_
               break;
             }
 
+          /* Setup mode routines. */
+          _gcry_cipher_setup_mode_ops(h, mode);
+
           /* Setup defaults depending on the mode.  */
           switch (mode)
             {
@@ -609,8 +615,7 @@ _gcry_cipher_open_internal (gcry_cipher_
             default:
               break;
             }
-
-	}
+        }
     }
 
   /* Done.  */
@@ -675,7 +680,7 @@ cipher_setkey (gcry_cipher_hd_t c, byte
 	}
     }
 
-  rc = c->spec->setkey (&c->context.c, key, keylen);
+  rc = c->spec->setkey (&c->context.c, key, keylen, c);
   if (!rc)
     {
       /* Duplicate initial context.  */
@@ -701,7 +706,7 @@ cipher_setkey (gcry_cipher_hd_t c, byte
 	case GCRY_CIPHER_MODE_XTS:
 	  /* Setup tweak cipher with second part of XTS key. */
 	  rc = c->spec->setkey (c->u_mode.xts.tweak_context, key + keylen,
-				keylen);
+				keylen, c);
 	  if (!rc)
 	    {
 	      /* Duplicate initial tweak context.  */
@@ -872,85 +877,78 @@ do_ecb_decrypt (gcry_cipher_hd_t c,
 }
 
 
-/****************
- * Encrypt INBUF to OUTBUF with the mode selected at open.
- * inbuf and outbuf may overlap or be the same.
- * Depending on the mode some constraints apply to INBUFLEN.
- */
 static gcry_err_code_t
-cipher_encrypt (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen,
-		const byte *inbuf, size_t inbuflen)
+do_stream_encrypt (gcry_cipher_hd_t c,
+                unsigned char *outbuf, size_t outbuflen,
+                const unsigned char *inbuf, size_t inbuflen)
+{
+  (void)outbuflen;
+  c->spec->stencrypt (&c->context.c, outbuf, (void *)inbuf, inbuflen);
+  return 0;
+}
+
+static gcry_err_code_t
+do_stream_decrypt (gcry_cipher_hd_t c,
+                unsigned char *outbuf, size_t outbuflen,
+                const unsigned char *inbuf, size_t inbuflen)
+{
+  (void)outbuflen;
+  c->spec->stdecrypt (&c->context.c, outbuf, (void *)inbuf, inbuflen);
+  return 0;
+}
+
+
+static gcry_err_code_t
+do_encrypt_none_unknown (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen,
+                         const byte *inbuf, size_t inbuflen)
 {
   gcry_err_code_t rc;
 
-  if (c->mode != GCRY_CIPHER_MODE_NONE && !c->marks.key)
-    {
-      log_error ("cipher_encrypt: key not set\n");
-      return GPG_ERR_MISSING_KEY;
-    }
+  (void)outbuflen;
 
   switch (c->mode)
     {
-    case GCRY_CIPHER_MODE_ECB:
-      rc = do_ecb_encrypt (c, outbuf, outbuflen, inbuf, inbuflen);
-      break;
-
-    case GCRY_CIPHER_MODE_CBC:
-      rc = _gcry_cipher_cbc_encrypt (c, outbuf, outbuflen, inbuf, inbuflen);
-      break;
-
-    case GCRY_CIPHER_MODE_CFB:
-      rc = _gcry_cipher_cfb_encrypt (c, outbuf, outbuflen, inbuf, inbuflen);
+    case GCRY_CIPHER_MODE_CMAC:
+      rc = GPG_ERR_INV_CIPHER_MODE;
       break;
 
-    case GCRY_CIPHER_MODE_CFB8:
-      rc = _gcry_cipher_cfb8_encrypt (c, outbuf, outbuflen, inbuf, inbuflen);
+    case GCRY_CIPHER_MODE_NONE:
+      if (fips_mode () || !_gcry_get_debug_flag (0))
+        {
+          fips_signal_error ("cipher mode NONE used");
+          rc = GPG_ERR_INV_CIPHER_MODE;
+        }
+      else
+        {
+          if (inbuf != outbuf)
+            memmove (outbuf, inbuf, inbuflen);
+          rc = 0;
+        }
       break;
 
-    case GCRY_CIPHER_MODE_OFB:
-      rc = _gcry_cipher_ofb_encrypt (c, outbuf, outbuflen, inbuf, inbuflen);
+    default:
+      log_fatal ("cipher_encrypt: invalid mode %d\n", c->mode );
+      rc = GPG_ERR_INV_CIPHER_MODE;
       break;
+    }
 
-    case GCRY_CIPHER_MODE_CTR:
-      rc = _gcry_cipher_ctr_encrypt (c, outbuf, outbuflen, inbuf, inbuflen);
-      break;
+  return rc;
+}
 
-    case GCRY_CIPHER_MODE_AESWRAP:
-      rc = _gcry_cipher_aeswrap_encrypt (c, outbuf, outbuflen,
-                                         inbuf, inbuflen);
-      break;
+static gcry_err_code_t
+do_decrypt_none_unknown (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen,
+                         const byte *inbuf, size_t inbuflen)
+{
+  gcry_err_code_t rc;
 
-    case GCRY_CIPHER_MODE_CCM:
-      rc = _gcry_cipher_ccm_encrypt (c, outbuf, outbuflen, inbuf, inbuflen);
-      break;
+  (void)outbuflen;
 
+  switch (c->mode)
+    {
     case GCRY_CIPHER_MODE_CMAC:
       rc = GPG_ERR_INV_CIPHER_MODE;
       break;
 
-    case GCRY_CIPHER_MODE_GCM:
-      rc = _gcry_cipher_gcm_encrypt (c, outbuf, outbuflen, inbuf, inbuflen);
-      break;
-
-    case GCRY_CIPHER_MODE_POLY1305:
-      rc = _gcry_cipher_poly1305_encrypt (c, outbuf, outbuflen,
-					  inbuf, inbuflen);
-      break;
-
-    case GCRY_CIPHER_MODE_OCB:
-      rc = _gcry_cipher_ocb_encrypt (c, outbuf, outbuflen, inbuf, inbuflen);
-      break;
-
-    case GCRY_CIPHER_MODE_XTS:
-      rc = _gcry_cipher_xts_crypt (c, outbuf, outbuflen, inbuf, inbuflen, 1);
-      break;
-
-    case GCRY_CIPHER_MODE_STREAM:
-      c->spec->stencrypt (&c->context.c,
-                          outbuf, (byte*)/*arggg*/inbuf, inbuflen);
-      rc = 0;
-      break;
-
     case GCRY_CIPHER_MODE_NONE:
       if (fips_mode () || !_gcry_get_debug_flag (0))
         {
@@ -966,7 +964,7 @@ cipher_encrypt (gcry_cipher_hd_t c, byte
       break;
 
     default:
-      log_fatal ("cipher_encrypt: invalid mode %d\n", c->mode );
+      log_fatal ("cipher_decrypt: invalid mode %d\n", c->mode );
       rc = GPG_ERR_INV_CIPHER_MODE;
       break;
     }
@@ -991,7 +989,13 @@ _gcry_cipher_encrypt (gcry_cipher_hd_t h
       inlen = outsize;
     }
 
-  rc = cipher_encrypt (h, out, outsize, in, inlen);
+  if (h->mode != GCRY_CIPHER_MODE_NONE && !h->marks.key)
+    {
+      log_error ("cipher_decrypt: key not set\n");
+      return GPG_ERR_MISSING_KEY;
+    }
+
+  rc = h->mode_ops.encrypt (h, out, outsize, in, inlen);
 
   /* Failsafe: Make sure that the plaintext will never make it into
      OUT if the encryption returned an error.  */
@@ -1002,110 +1006,10 @@ _gcry_cipher_encrypt (gcry_cipher_hd_t h
 }
 
 
-
 /****************
- * Decrypt INBUF to OUTBUF with the mode selected at open.
- * inbuf and outbuf may overlap or be the same.
- * Depending on the mode some some constraints apply to INBUFLEN.
+ * Decrypt IN and write it to OUT.  If IN is NULL, in-place encryption has
+ * been requested.
  */
-static gcry_err_code_t
-cipher_decrypt (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen,
-                const byte *inbuf, size_t inbuflen)
-{
-  gcry_err_code_t rc;
-
-  if (c->mode != GCRY_CIPHER_MODE_NONE && !c->marks.key)
-    {
-      log_error ("cipher_decrypt: key not set\n");
-      return GPG_ERR_MISSING_KEY;
-    }
-
-  switch (c->mode)
-    {
-    case GCRY_CIPHER_MODE_ECB:
-      rc = do_ecb_decrypt (c, outbuf, outbuflen, inbuf, inbuflen);
-      break;
-
-    case GCRY_CIPHER_MODE_CBC:
-      rc = _gcry_cipher_cbc_decrypt (c, outbuf, outbuflen, inbuf, inbuflen);
-      break;
-
-    case GCRY_CIPHER_MODE_CFB:
-      rc = _gcry_cipher_cfb_decrypt (c, outbuf, outbuflen, inbuf, inbuflen);
-      break;
-
-    case GCRY_CIPHER_MODE_CFB8:
-      rc = _gcry_cipher_cfb8_decrypt (c, outbuf, outbuflen, inbuf, inbuflen);
-      break;
-
-    case GCRY_CIPHER_MODE_OFB:
-      rc = _gcry_cipher_ofb_encrypt (c, outbuf, outbuflen, inbuf, inbuflen);
-      break;
-
-    case GCRY_CIPHER_MODE_CTR:
-      rc = _gcry_cipher_ctr_encrypt (c, outbuf, outbuflen, inbuf, inbuflen);
-      break;
-
-    case GCRY_CIPHER_MODE_AESWRAP:
-      rc = _gcry_cipher_aeswrap_decrypt (c, outbuf, outbuflen,
-                                         inbuf, inbuflen);
-      break;
-
-    case GCRY_CIPHER_MODE_CCM:
-      rc = _gcry_cipher_ccm_decrypt (c, outbuf, outbuflen, inbuf, inbuflen);
-      break;
-
-    case GCRY_CIPHER_MODE_CMAC:
-      rc = GPG_ERR_INV_CIPHER_MODE;
-      break;
-
-    case GCRY_CIPHER_MODE_GCM:
-      rc = _gcry_cipher_gcm_decrypt (c, outbuf, outbuflen, inbuf, inbuflen);
-      break;
-
-    case GCRY_CIPHER_MODE_POLY1305:
-      rc = _gcry_cipher_poly1305_decrypt (c, outbuf, outbuflen,
-					  inbuf, inbuflen);
-      break;
-
-    case GCRY_CIPHER_MODE_OCB:
-      rc = _gcry_cipher_ocb_decrypt (c, outbuf, outbuflen, inbuf, inbuflen);
-      break;
-
-    case GCRY_CIPHER_MODE_XTS:
-      rc = _gcry_cipher_xts_crypt (c, outbuf, outbuflen, inbuf, inbuflen, 0);
-      break;
-
-    case GCRY_CIPHER_MODE_STREAM:
-      c->spec->stdecrypt (&c->context.c,
-                          outbuf, (byte*)/*arggg*/inbuf, inbuflen);
-      rc = 0;
-      break;
-
-    case GCRY_CIPHER_MODE_NONE:
-      if (fips_mode () || !_gcry_get_debug_flag (0))
-        {
-          fips_signal_error ("cipher mode NONE used");
-          rc = GPG_ERR_INV_CIPHER_MODE;
-        }
-      else
-        {
-          if (inbuf != outbuf)
-            memmove (outbuf, inbuf, inbuflen);
-          rc = 0;
-        }
-      break;
-
-    default:
-      log_fatal ("cipher_decrypt: invalid mode %d\n", c->mode );
-      rc = GPG_ERR_INV_CIPHER_MODE;
-      break;
-    }
-
-  return rc;
-}
-
-
 gcry_err_code_t
 _gcry_cipher_decrypt (gcry_cipher_hd_t h, void *out, size_t outsize,
                       const void *in, size_t inlen)
@@ -1116,9 +1020,14 @@ _gcry_cipher_decrypt (gcry_cipher_hd_t h
       inlen = outsize;
     }
 
-  return cipher_decrypt (h, out, outsize, in, inlen);
-}
+  if (h->mode != GCRY_CIPHER_MODE_NONE && !h->marks.key)
+    {
+      log_error ("cipher_decrypt: key not set\n");
+      return GPG_ERR_MISSING_KEY;
+    }
 
+  return h->mode_ops.decrypt (h, out, outsize, in, inlen);
+}
 
 
 /****************
@@ -1149,33 +1058,10 @@ _gcry_cipher_setkey (gcry_cipher_hd_t hd
 gcry_err_code_t
 _gcry_cipher_setiv (gcry_cipher_hd_t hd, const void *iv, size_t ivlen)
 {
-  gcry_err_code_t rc = 0;
-
-  switch (hd->mode)
-    {
-      case GCRY_CIPHER_MODE_CCM:
-        rc = _gcry_cipher_ccm_set_nonce (hd, iv, ivlen);
-        break;
-
-      case GCRY_CIPHER_MODE_GCM:
-        rc =  _gcry_cipher_gcm_setiv (hd, iv, ivlen);
-        break;
-
-      case GCRY_CIPHER_MODE_POLY1305:
-        rc =  _gcry_cipher_poly1305_setiv (hd, iv, ivlen);
-        break;
-
-      case GCRY_CIPHER_MODE_OCB:
-        rc = _gcry_cipher_ocb_set_nonce (hd, iv, ivlen);
-        break;
-
-      default:
-        rc = cipher_setiv (hd, iv, ivlen);
-        break;
-    }
-  return rc;
+  return hd->mode_ops.setiv (hd, iv, ivlen);
 }
 
+
 /* Set counter for CTR mode.  (CTR,CTRLEN) must denote a buffer of
    block size length, or (NULL,0) to set the CTR to the all-zero
    block. */
@@ -1209,38 +1095,40 @@ _gcry_cipher_getctr (gcry_cipher_hd_t hd
   return 0;
 }
 
+
 gcry_err_code_t
 _gcry_cipher_authenticate (gcry_cipher_hd_t hd, const void *abuf,
                            size_t abuflen)
 {
   gcry_err_code_t rc;
 
-  switch (hd->mode)
+  if (hd->mode_ops.authenticate)
     {
-    case GCRY_CIPHER_MODE_CCM:
-      rc = _gcry_cipher_ccm_authenticate (hd, abuf, abuflen);
-      break;
-
-    case GCRY_CIPHER_MODE_CMAC:
-      rc = _gcry_cipher_cmac_authenticate (hd, abuf, abuflen);
-      break;
+      rc = hd->mode_ops.authenticate (hd, abuf, abuflen);
+    }
+  else
+    {
+      log_error ("gcry_cipher_authenticate: invalid mode %d\n", hd->mode);
+      rc = GPG_ERR_INV_CIPHER_MODE;
+    }
 
-    case GCRY_CIPHER_MODE_GCM:
-      rc = _gcry_cipher_gcm_authenticate (hd, abuf, abuflen);
-      break;
+  return rc;
+}
 
-    case GCRY_CIPHER_MODE_POLY1305:
-      rc = _gcry_cipher_poly1305_authenticate (hd, abuf, abuflen);
-      break;
 
-    case GCRY_CIPHER_MODE_OCB:
-      rc = _gcry_cipher_ocb_authenticate (hd, abuf, abuflen);
-      break;
+gcry_err_code_t
+_gcry_cipher_gettag (gcry_cipher_hd_t hd, void *outtag, size_t taglen)
+{
+  gcry_err_code_t rc;
 
-    default:
-      log_error ("gcry_cipher_authenticate: invalid mode %d\n", hd->mode);
+  if (hd->mode_ops.get_tag)
+    {
+      rc = hd->mode_ops.get_tag (hd, outtag, taglen);
+    }
+  else
+    {
+      log_error ("gcry_cipher_gettag: invalid mode %d\n", hd->mode);
       rc = GPG_ERR_INV_CIPHER_MODE;
-      break;
     }
 
   return rc;
@@ -1248,76 +1136,166 @@ _gcry_cipher_authenticate (gcry_cipher_h
 
 
 gcry_err_code_t
-_gcry_cipher_gettag (gcry_cipher_hd_t hd, void *outtag, size_t taglen)
+_gcry_cipher_checktag (gcry_cipher_hd_t hd, const void *intag, size_t taglen)
 {
   gcry_err_code_t rc;
 
-  switch (hd->mode)
+  if (hd->mode_ops.check_tag)
     {
-    case GCRY_CIPHER_MODE_CCM:
-      rc = _gcry_cipher_ccm_get_tag (hd, outtag, taglen);
+      rc = hd->mode_ops.check_tag (hd, intag, taglen);
+    }
+  else
+    {
+      log_error ("gcry_cipher_checktag: invalid mode %d\n", hd->mode);
+      rc = GPG_ERR_INV_CIPHER_MODE;
+    }
+
+  return rc;
+}
+
+
+
+static void
+_gcry_cipher_setup_mode_ops(gcry_cipher_hd_t c, int mode)
+{
+  /* Setup encryption and decryption routines. */
+  switch (mode)
+    {
+    case GCRY_CIPHER_MODE_STREAM:
+      c->mode_ops.encrypt = do_stream_encrypt;
+      c->mode_ops.decrypt = do_stream_decrypt;
       break;
 
-    case GCRY_CIPHER_MODE_CMAC:
-      rc = _gcry_cipher_cmac_get_tag (hd, outtag, taglen);
+    case GCRY_CIPHER_MODE_ECB:
+      c->mode_ops.encrypt = do_ecb_encrypt;
+      c->mode_ops.decrypt = do_ecb_decrypt;
+      break;
+
+    case GCRY_CIPHER_MODE_CBC:
+      c->mode_ops.encrypt = _gcry_cipher_cbc_encrypt;
+      c->mode_ops.decrypt = _gcry_cipher_cbc_decrypt;
+      break;
+
+    case GCRY_CIPHER_MODE_CFB:
+      c->mode_ops.encrypt = _gcry_cipher_cfb_encrypt;
+      c->mode_ops.decrypt = _gcry_cipher_cfb_decrypt;
+      break;
+
+    case GCRY_CIPHER_MODE_CFB8:
+      c->mode_ops.encrypt = _gcry_cipher_cfb8_encrypt;
+      c->mode_ops.decrypt = _gcry_cipher_cfb8_decrypt;
+      break;
+
+    case GCRY_CIPHER_MODE_OFB:
+      c->mode_ops.encrypt = _gcry_cipher_ofb_encrypt;
+      c->mode_ops.decrypt = _gcry_cipher_ofb_encrypt;
+      break;
+
+    case GCRY_CIPHER_MODE_CTR:
+      c->mode_ops.encrypt = _gcry_cipher_ctr_encrypt;
+      c->mode_ops.decrypt = _gcry_cipher_ctr_encrypt;
+      break;
+
+    case GCRY_CIPHER_MODE_AESWRAP:
+      c->mode_ops.encrypt = _gcry_cipher_aeswrap_encrypt;
+      c->mode_ops.decrypt = _gcry_cipher_aeswrap_decrypt;
+      break;
+
+    case GCRY_CIPHER_MODE_CCM:
+      c->mode_ops.encrypt = _gcry_cipher_ccm_encrypt;
+      c->mode_ops.decrypt = _gcry_cipher_ccm_decrypt;
       break;
 
     case GCRY_CIPHER_MODE_GCM:
-      rc = _gcry_cipher_gcm_get_tag (hd, outtag, taglen);
+      c->mode_ops.encrypt = _gcry_cipher_gcm_encrypt;
+      c->mode_ops.decrypt = _gcry_cipher_gcm_decrypt;
       break;
 
     case GCRY_CIPHER_MODE_POLY1305:
-      rc = _gcry_cipher_poly1305_get_tag (hd, outtag, taglen);
+      c->mode_ops.encrypt = _gcry_cipher_poly1305_encrypt;
+      c->mode_ops.decrypt = _gcry_cipher_poly1305_decrypt;
       break;
 
     case GCRY_CIPHER_MODE_OCB:
-      rc = _gcry_cipher_ocb_get_tag (hd, outtag, taglen);
+      c->mode_ops.encrypt = _gcry_cipher_ocb_encrypt;
+      c->mode_ops.decrypt = _gcry_cipher_ocb_decrypt;
+      break;
+
+    case GCRY_CIPHER_MODE_XTS:
+      c->mode_ops.encrypt = _gcry_cipher_xts_encrypt;
+      c->mode_ops.decrypt = _gcry_cipher_xts_decrypt;
       break;
 
     default:
-      log_error ("gcry_cipher_gettag: invalid mode %d\n", hd->mode);
-      rc = GPG_ERR_INV_CIPHER_MODE;
+      c->mode_ops.encrypt = do_encrypt_none_unknown;
+      c->mode_ops.decrypt = do_decrypt_none_unknown;
       break;
     }
 
-  return rc;
-}
+  /* Setup IV setting routine. */
+  switch (mode)
+    {
+    case GCRY_CIPHER_MODE_CCM:
+      c->mode_ops.setiv = _gcry_cipher_ccm_set_nonce;
+      break;
+
+    case GCRY_CIPHER_MODE_GCM:
+      c->mode_ops.setiv =  _gcry_cipher_gcm_setiv;
+      break;
 
+    case GCRY_CIPHER_MODE_POLY1305:
+      c->mode_ops.setiv = _gcry_cipher_poly1305_setiv;
+      break;
 
-gcry_err_code_t
-_gcry_cipher_checktag (gcry_cipher_hd_t hd, const void *intag, size_t taglen)
-{
-  gcry_err_code_t rc;
+    case GCRY_CIPHER_MODE_OCB:
+      c->mode_ops.setiv = _gcry_cipher_ocb_set_nonce;
+      break;
 
-  switch (hd->mode)
+    default:
+      c->mode_ops.setiv = cipher_setiv;
+      break;
+    }
+
+
+  /* Setup authentication routines for AEAD modes. */
+  switch (mode)
     {
     case GCRY_CIPHER_MODE_CCM:
-      rc = _gcry_cipher_ccm_check_tag (hd, intag, taglen);
+      c->mode_ops.authenticate = _gcry_cipher_ccm_authenticate;
+      c->mode_ops.get_tag      = _gcry_cipher_ccm_get_tag;
+      c->mode_ops.check_tag    = _gcry_cipher_ccm_check_tag;
       break;
 
     case GCRY_CIPHER_MODE_CMAC:
-      rc = _gcry_cipher_cmac_check_tag (hd, intag, taglen);
+      c->mode_ops.authenticate = _gcry_cipher_cmac_authenticate;
+      c->mode_ops.get_tag      = _gcry_cipher_cmac_get_tag;
+      c->mode_ops.check_tag    = _gcry_cipher_cmac_check_tag;
       break;
 
     case GCRY_CIPHER_MODE_GCM:
-      rc = _gcry_cipher_gcm_check_tag (hd, intag, taglen);
+      c->mode_ops.authenticate = _gcry_cipher_gcm_authenticate;
+      c->mode_ops.get_tag      = _gcry_cipher_gcm_get_tag;
+      c->mode_ops.check_tag    = _gcry_cipher_gcm_check_tag;
       break;
 
     case GCRY_CIPHER_MODE_POLY1305:
-      rc = _gcry_cipher_poly1305_check_tag (hd, intag, taglen);
+      c->mode_ops.authenticate = _gcry_cipher_poly1305_authenticate;
+      c->mode_ops.get_tag      = _gcry_cipher_poly1305_get_tag;
+      c->mode_ops.check_tag    = _gcry_cipher_poly1305_check_tag;
       break;
 
     case GCRY_CIPHER_MODE_OCB:
-      rc = _gcry_cipher_ocb_check_tag (hd, intag, taglen);
+      c->mode_ops.authenticate = _gcry_cipher_ocb_authenticate;
+      c->mode_ops.get_tag      = _gcry_cipher_ocb_get_tag;
+      c->mode_ops.check_tag    = _gcry_cipher_ocb_check_tag;
       break;
 
     default:
-      log_error ("gcry_cipher_checktag: invalid mode %d\n", hd->mode);
-      rc = GPG_ERR_INV_CIPHER_MODE;
+      c->mode_ops.authenticate = NULL;
+      c->mode_ops.get_tag      = NULL;
+      c->mode_ops.check_tag    = NULL;
       break;
     }
-
-  return rc;
 }
 
 
diff -up libgcrypt-1.8.5/cipher/cipher-cbc.c.aes-perf libgcrypt-1.8.5/cipher/cipher-cbc.c
--- libgcrypt-1.8.5/cipher/cipher-cbc.c.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/cipher-cbc.c	2020-04-22 18:29:41.666862306 +0200
@@ -79,7 +79,7 @@ _gcry_cipher_cbc_encrypt (gcry_cipher_hd
 
       for (n=0; n < nblocks; n++ )
         {
-          buf_xor (outbuf, inbuf, ivp, blocksize);
+          cipher_block_xor (outbuf, inbuf, ivp, blocksize);
           nburn = enc_fn ( &c->context.c, outbuf, outbuf );
           burn = nburn > burn ? nburn : burn;
           ivp = outbuf;
@@ -116,7 +116,7 @@ _gcry_cipher_cbc_encrypt (gcry_cipher_hd
 
       nburn = enc_fn (&c->context.c, outbuf, outbuf);
       burn = nburn > burn ? nburn : burn;
-      buf_cpy (c->u_iv.iv, outbuf, blocksize);
+      cipher_block_cpy (c->u_iv.iv, outbuf, blocksize);
     }
 
   if (burn > 0)
@@ -158,7 +158,7 @@ _gcry_cipher_cbc_decrypt (gcry_cipher_hd
       nblocks--;
       if ((inbuflen % blocksize) == 0)
 	nblocks--;
-      buf_cpy (c->lastiv, c->u_iv.iv, blocksize);
+      cipher_block_cpy (c->lastiv, c->u_iv.iv, blocksize);
     }
 
   if (c->bulk.cbc_dec)
@@ -176,7 +176,8 @@ _gcry_cipher_cbc_decrypt (gcry_cipher_hd
              storage here because it is not used otherwise.  */
           nburn = dec_fn ( &c->context.c, c->lastiv, inbuf );
           burn = nburn > burn ? nburn : burn;
-          buf_xor_n_copy_2(outbuf, c->lastiv, c->u_iv.iv, inbuf, blocksize);
+          cipher_block_xor_n_copy_2 (outbuf, c->lastiv, c->u_iv.iv, inbuf,
+                                     blocksize);
           inbuf  += blocksize;
           outbuf += blocksize;
         }
@@ -191,7 +192,7 @@ _gcry_cipher_cbc_decrypt (gcry_cipher_hd
       else
         restbytes = inbuflen % blocksize;
 
-      buf_cpy (c->lastiv, c->u_iv.iv, blocksize );         /* Save Cn-2. */
+      cipher_block_cpy (c->lastiv, c->u_iv.iv, blocksize ); /* Save Cn-2. */
       buf_cpy (c->u_iv.iv, inbuf + blocksize, restbytes ); /* Save Cn. */
 
       nburn = dec_fn ( &c->context.c, outbuf, inbuf );
@@ -203,7 +204,7 @@ _gcry_cipher_cbc_decrypt (gcry_cipher_hd
         c->u_iv.iv[i] = outbuf[i];
       nburn = dec_fn (&c->context.c, outbuf, c->u_iv.iv);
       burn = nburn > burn ? nburn : burn;
-      buf_xor(outbuf, outbuf, c->lastiv, blocksize);
+      cipher_block_xor(outbuf, outbuf, c->lastiv, blocksize);
       /* c->lastiv is now really lastlastiv, does this matter? */
     }
 
diff -up libgcrypt-1.8.5/cipher/cipher-ccm.c.aes-perf libgcrypt-1.8.5/cipher/cipher-ccm.c
--- libgcrypt-1.8.5/cipher/cipher-ccm.c.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/cipher-ccm.c	2020-04-22 18:29:41.666862306 +0200
@@ -67,7 +67,8 @@ do_cbc_mac (gcry_cipher_hd_t c, const un
       if (unused > 0)
         {
           /* Process one block from macbuf.  */
-          buf_xor(c->u_iv.iv, c->u_iv.iv, c->u_mode.ccm.macbuf, blocksize);
+          cipher_block_xor(c->u_iv.iv, c->u_iv.iv, c->u_mode.ccm.macbuf,
+                           blocksize);
           set_burn (burn, enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ));
 
           unused = 0;
@@ -86,7 +87,7 @@ do_cbc_mac (gcry_cipher_hd_t c, const un
         {
           while (inlen >= blocksize)
             {
-              buf_xor(c->u_iv.iv, c->u_iv.iv, inbuf, blocksize);
+              cipher_block_xor(c->u_iv.iv, c->u_iv.iv, inbuf, blocksize);
 
               set_burn (burn, enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ));
 
@@ -272,7 +273,7 @@ _gcry_cipher_ccm_tag (gcry_cipher_hd_t c
       burn = do_cbc_mac (c, NULL, 0, 1); /* Perform final padding.  */
 
       /* Add S_0 */
-      buf_xor (c->u_iv.iv, c->u_iv.iv, c->u_mode.ccm.s0, 16);
+      cipher_block_xor (c->u_iv.iv, c->u_iv.iv, c->u_mode.ccm.s0, 16);
 
       wipememory (c->u_ctr.ctr, 16);
       wipememory (c->u_mode.ccm.s0, 16);
diff -up libgcrypt-1.8.5/cipher/cipher-cfb.c.aes-perf libgcrypt-1.8.5/cipher/cipher-cfb.c
--- libgcrypt-1.8.5/cipher/cipher-cfb.c.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/cipher-cfb.c	2020-04-22 18:29:41.667862287 +0200
@@ -91,7 +91,7 @@ _gcry_cipher_cfb_encrypt (gcry_cipher_hd
           nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
           burn = nburn > burn ? nburn : burn;
           /* XOR the input with the IV and store input into IV.  */
-          buf_xor_2dst(outbuf, c->u_iv.iv, inbuf, blocksize);
+          cipher_block_xor_2dst(outbuf, c->u_iv.iv, inbuf, blocksize);
           outbuf += blocksize;
           inbuf += blocksize;
           inbuflen -= blocksize;
@@ -101,11 +101,11 @@ _gcry_cipher_cfb_encrypt (gcry_cipher_hd
   if ( inbuflen >= blocksize )
     {
       /* Save the current IV and then encrypt the IV. */
-      buf_cpy( c->lastiv, c->u_iv.iv, blocksize );
+      cipher_block_cpy( c->lastiv, c->u_iv.iv, blocksize );
       nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
       burn = nburn > burn ? nburn : burn;
       /* XOR the input with the IV and store input into IV */
-      buf_xor_2dst(outbuf, c->u_iv.iv, inbuf, blocksize);
+      cipher_block_xor_2dst(outbuf, c->u_iv.iv, inbuf, blocksize);
       outbuf += blocksize;
       inbuf += blocksize;
       inbuflen -= blocksize;
@@ -113,7 +113,7 @@ _gcry_cipher_cfb_encrypt (gcry_cipher_hd
   if ( inbuflen )
     {
       /* Save the current IV and then encrypt the IV. */
-      buf_cpy( c->lastiv, c->u_iv.iv, blocksize );
+      cipher_block_cpy( c->lastiv, c->u_iv.iv, blocksize );
       nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
       burn = nburn > burn ? nburn : burn;
       c->unused = blocksize;
@@ -193,7 +193,7 @@ _gcry_cipher_cfb_decrypt (gcry_cipher_hd
           nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
           burn = nburn > burn ? nburn : burn;
           /* XOR the input with the IV and store input into IV. */
-          buf_xor_n_copy(outbuf, c->u_iv.iv, inbuf, blocksize);
+          cipher_block_xor_n_copy(outbuf, c->u_iv.iv, inbuf, blocksize);
           outbuf += blocksize;
           inbuf += blocksize;
           inbuflen -= blocksize;
@@ -203,11 +203,11 @@ _gcry_cipher_cfb_decrypt (gcry_cipher_hd
   if (inbuflen >= blocksize )
     {
       /* Save the current IV and then encrypt the IV. */
-      buf_cpy ( c->lastiv, c->u_iv.iv, blocksize);
+      cipher_block_cpy ( c->lastiv, c->u_iv.iv, blocksize);
       nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
       burn = nburn > burn ? nburn : burn;
       /* XOR the input with the IV and store input into IV */
-      buf_xor_n_copy(outbuf, c->u_iv.iv, inbuf, blocksize);
+      cipher_block_xor_n_copy(outbuf, c->u_iv.iv, inbuf, blocksize);
       outbuf += blocksize;
       inbuf += blocksize;
       inbuflen -= blocksize;
@@ -216,7 +216,7 @@ _gcry_cipher_cfb_decrypt (gcry_cipher_hd
   if (inbuflen)
     {
       /* Save the current IV and then encrypt the IV. */
-      buf_cpy ( c->lastiv, c->u_iv.iv, blocksize );
+      cipher_block_cpy ( c->lastiv, c->u_iv.iv, blocksize );
       nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
       burn = nburn > burn ? nburn : burn;
       c->unused = blocksize;
diff -up libgcrypt-1.8.5/cipher/cipher-cmac.c.aes-perf libgcrypt-1.8.5/cipher/cipher-cmac.c
--- libgcrypt-1.8.5/cipher/cipher-cmac.c.aes-perf	2020-04-22 18:29:41.643862745 +0200
+++ libgcrypt-1.8.5/cipher/cipher-cmac.c	2020-04-22 18:29:41.667862287 +0200
@@ -63,7 +63,7 @@ cmac_write (gcry_cipher_hd_t c, const by
       for (; inlen && c->unused < blocksize; inlen--)
         c->lastiv[c->unused++] = *inbuf++;
 
-      buf_xor (c->u_iv.iv, c->u_iv.iv, c->lastiv, blocksize);
+      cipher_block_xor (c->u_iv.iv, c->u_iv.iv, c->lastiv, blocksize);
       set_burn (burn, enc_fn (&c->context.c, c->u_iv.iv, c->u_iv.iv));
 
       c->unused = 0;
@@ -83,7 +83,7 @@ cmac_write (gcry_cipher_hd_t c, const by
   else
     while (inlen > blocksize)
       {
-        buf_xor (c->u_iv.iv, c->u_iv.iv, inbuf, blocksize);
+        cipher_block_xor (c->u_iv.iv, c->u_iv.iv, inbuf, blocksize);
         set_burn (burn, enc_fn (&c->context.c, c->u_iv.iv, c->u_iv.iv));
         inlen -= blocksize;
         inbuf += blocksize;
@@ -174,9 +174,9 @@ cmac_final (gcry_cipher_hd_t c)
         c->lastiv[count++] = 0;
     }
 
-  buf_xor (c->lastiv, c->lastiv, subkey, blocksize);
+  cipher_block_xor (c->lastiv, c->lastiv, subkey, blocksize);
 
-  buf_xor (c->u_iv.iv, c->u_iv.iv, c->lastiv, blocksize);
+  cipher_block_xor (c->u_iv.iv, c->u_iv.iv, c->lastiv, blocksize);
   burn = c->spec->encrypt (&c->context.c, c->u_iv.iv, c->u_iv.iv);
   if (burn)
     _gcry_burn_stack (burn + 4 * sizeof (void *));
diff -up libgcrypt-1.8.5/cipher/cipher-ctr.c.aes-perf libgcrypt-1.8.5/cipher/cipher-ctr.c
--- libgcrypt-1.8.5/cipher/cipher-ctr.c.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/cipher-ctr.c	2020-04-22 18:29:41.667862287 +0200
@@ -81,24 +81,34 @@ _gcry_cipher_ctr_encrypt (gcry_cipher_hd
     {
       unsigned char tmp[MAX_BLOCKSIZE];
 
-      do {
-        nburn = enc_fn (&c->context.c, tmp, c->u_ctr.ctr);
-        burn = nburn > burn ? nburn : burn;
-
-        for (i = blocksize; i > 0; i--)
-          {
-            c->u_ctr.ctr[i-1]++;
-            if (c->u_ctr.ctr[i-1] != 0)
-              break;
-          }
-
-        n = blocksize < inbuflen ? blocksize : inbuflen;
-        buf_xor(outbuf, inbuf, tmp, n);
-
-        inbuflen -= n;
-        outbuf += n;
-        inbuf += n;
-      } while (inbuflen);
+      do
+        {
+          nburn = enc_fn (&c->context.c, tmp, c->u_ctr.ctr);
+          burn = nburn > burn ? nburn : burn;
+
+          for (i = blocksize; i > 0; i--)
+            {
+              c->u_ctr.ctr[i-1]++;
+              if (c->u_ctr.ctr[i-1] != 0)
+                break;
+            }
+
+          if (inbuflen < blocksize)
+            break;
+          n = blocksize;
+          cipher_block_xor(outbuf, inbuf, tmp, blocksize);
+
+          inbuflen -= n;
+          outbuf += n;
+          inbuf += n;
+        }
+      while (inbuflen);
+
+      if (inbuflen)
+        {
+          n = inbuflen;
+          buf_xor(outbuf, inbuf, tmp, inbuflen);
+        }
 
       /* Save the unused bytes of the counter.  */
       c->unused = blocksize - n;
diff -up libgcrypt-1.8.5/cipher/cipher-gcm.c.aes-perf libgcrypt-1.8.5/cipher/cipher-gcm.c
--- libgcrypt-1.8.5/cipher/cipher-gcm.c.aes-perf	2018-04-17 17:27:25.000000000 +0200
+++ libgcrypt-1.8.5/cipher/cipher-gcm.c	2020-04-22 18:29:41.667862287 +0200
@@ -150,7 +150,7 @@ do_ghash (unsigned char *result, const u
   u32 A;
   int i;
 
-  buf_xor (V, result, buf, 16);
+  cipher_block_xor (V, result, buf, 16);
   V[0] = be_bswap64 (V[0]);
   V[1] = be_bswap64 (V[1]);
 
@@ -259,7 +259,7 @@ do_ghash (unsigned char *result, const u
   u32 T[3];
   int i;
 
-  buf_xor (V, result, buf, 16); /* V is big-endian */
+  cipher_block_xor (V, result, buf, 16); /* V is big-endian */
 
   /* First round can be manually tweaked based on fact that 'tmp' is zero. */
   i = 15;
@@ -342,7 +342,7 @@ do_ghash (unsigned char *hsub, unsigned
 #else
   unsigned long T[4];
 
-  buf_xor (V, result, buf, 16);
+  cipher_block_xor (V, result, buf, 16);
   for (i = 0; i < 4; i++)
     {
       V[i] = (V[i] & 0x00ff00ff) << 8 | (V[i] & 0xff00ff00) >> 8;
@@ -358,7 +358,7 @@ do_ghash (unsigned char *hsub, unsigned
       for (j = 0x80; j; j >>= 1)
         {
           if (hsub[i] & j)
-            buf_xor (p, p, V, 16);
+            cipher_block_xor (p, p, V, 16);
           if (bshift (V))
             V[0] ^= 0xe1000000;
         }
@@ -598,7 +598,7 @@ gcm_ctr_encrypt (gcry_cipher_hd_t c, byt
                 }
 
               fix_ctr = 1;
-              buf_cpy(ctr_copy, c->u_ctr.ctr, GCRY_GCM_BLOCK_LEN);
+              cipher_block_cpy(ctr_copy, c->u_ctr.ctr, GCRY_GCM_BLOCK_LEN);
             }
         }
 
@@ -928,8 +928,8 @@ _gcry_cipher_gcm_tag (gcry_cipher_hd_t c
       /* Add bitlengths to tag. */
       do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, (byte*)bitlengths,
                    GCRY_GCM_BLOCK_LEN, 1);
-      buf_xor (c->u_mode.gcm.u_tag.tag, c->u_mode.gcm.tagiv,
-               c->u_mode.gcm.u_tag.tag, GCRY_GCM_BLOCK_LEN);
+      cipher_block_xor (c->u_mode.gcm.u_tag.tag, c->u_mode.gcm.tagiv,
+                        c->u_mode.gcm.u_tag.tag, GCRY_GCM_BLOCK_LEN);
       c->marks.tag = 1;
 
       wipememory (bitlengths, sizeof (bitlengths));
diff -up libgcrypt-1.8.5/cipher/cipher-gcm-intel-pclmul.c.aes-perf libgcrypt-1.8.5/cipher/cipher-gcm-intel-pclmul.c
--- libgcrypt-1.8.5/cipher/cipher-gcm-intel-pclmul.c.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/cipher-gcm-intel-pclmul.c	2020-04-22 18:29:41.668862268 +0200
@@ -248,7 +248,8 @@ static inline void gfmul_pclmul_aggr4(vo
 void
 _gcry_ghash_setup_intel_pclmul (gcry_cipher_hd_t c)
 {
-  u64 tmp[2];
+  static const unsigned char be_mask[16] __attribute__ ((aligned (16))) =
+    { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
 #if defined(__x86_64__) && defined(__WIN64__)
   char win64tmp[3 * 16];
 
@@ -262,15 +263,19 @@ _gcry_ghash_setup_intel_pclmul (gcry_cip
 #endif
 
   /* Swap endianness of hsub. */
-  tmp[0] = buf_get_be64(c->u_mode.gcm.u_ghash_key.key + 8);
-  tmp[1] = buf_get_be64(c->u_mode.gcm.u_ghash_key.key + 0);
-  buf_cpy (c->u_mode.gcm.u_ghash_key.key, tmp, GCRY_GCM_BLOCK_LEN);
+  asm volatile ("movdqu (%[key]), %%xmm0\n\t"
+                "pshufb %[be_mask], %%xmm0\n\t"
+                "movdqu %%xmm0, (%[key])\n\t"
+                :
+                : [key] "r" (c->u_mode.gcm.u_ghash_key.key),
+                  [be_mask] "m" (*be_mask)
+                : "memory");
 
 #ifdef __x86_64__
-  asm volatile ("movdqu %[h_1], %%xmm0\n\t"
-                "movdqa %%xmm0, %%xmm1\n\t"
+  asm volatile ("movdqa %%xmm0, %%xmm1\n\t"
+                :
                 :
-                : [h_1] "m" (*tmp));
+                : "memory");
 
   gfmul_pclmul (); /* H•H => H² */
 
@@ -324,8 +329,6 @@ _gcry_ghash_setup_intel_pclmul (gcry_cip
                 ::: "cc" );
 #endif
 #endif
-
-  wipememory (tmp, sizeof(tmp));
 }
 
 
diff -up libgcrypt-1.8.5/cipher/cipher-internal.h.aes-perf libgcrypt-1.8.5/cipher/cipher-internal.h
--- libgcrypt-1.8.5/cipher/cipher-internal.h.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/cipher-internal.h	2020-04-22 18:29:41.668862268 +0200
@@ -121,6 +121,25 @@ struct gcry_cipher_handle
      interface does not easily allow to retrieve this value. */
   int algo;
 
+  /* A structure with function pointers for mode operations. */
+  struct {
+    gcry_err_code_t (*encrypt)(gcry_cipher_hd_t c,
+                               unsigned char *outbuf, size_t outbuflen,
+                               const unsigned char *inbuf, size_t inbuflen);
+    gcry_err_code_t (*decrypt)(gcry_cipher_hd_t c,
+                               unsigned char *outbuf, size_t outbuflen,
+                               const unsigned char *inbuf, size_t inbuflen);
+    gcry_err_code_t (*setiv)(gcry_cipher_hd_t c, const unsigned char *iv,
+                             size_t ivlen);
+
+    gcry_err_code_t (*authenticate)(gcry_cipher_hd_t c,
+                                    const unsigned char *abuf, size_t abuflen);
+    gcry_err_code_t (*get_tag)(gcry_cipher_hd_t c, unsigned char *outtag,
+                               size_t taglen);
+    gcry_err_code_t (*check_tag)(gcry_cipher_hd_t c, const unsigned char *intag,
+                                 size_t taglen);
+  } mode_ops;
+
   /* A structure with function pointers for bulk operations.  Due to
      limitations of the module system (we don't want to change the
      API) we need to keep these function pointers here.  The cipher
@@ -146,7 +165,7 @@ struct gcry_cipher_handle
 			const void *inbuf_arg, size_t nblocks, int encrypt);
     size_t (*ocb_auth)(gcry_cipher_hd_t c, const void *abuf_arg,
 		       size_t nblocks);
-    void (*xts_crypt)(gcry_cipher_hd_t c, unsigned char *tweak,
+    void (*xts_crypt)(void *context, unsigned char *tweak,
 		      void *outbuf_arg, const void *inbuf_arg,
 		      size_t nblocks, int encrypt);
   } bulk;
@@ -479,9 +498,12 @@ gcry_err_code_t _gcry_cipher_ocb_check_t
 
 
 /*-- cipher-xts.c --*/
-gcry_err_code_t _gcry_cipher_xts_crypt
+gcry_err_code_t _gcry_cipher_xts_encrypt
 /*           */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen,
-		 const unsigned char *inbuf, size_t inbuflen, int encrypt);
+		 const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_xts_decrypt
+/*           */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen,
+		 const unsigned char *inbuf, size_t inbuflen);
 
 
 /* Return the L-value for block N.  Note: 'cipher_ocb.c' ensures that N
@@ -506,4 +528,145 @@ ocb_get_l (gcry_cipher_hd_t c, u64 n)
   return c->u_mode.ocb.L[ntz];
 }
 
+/* Optimized function for cipher block copying */
+static inline void
+cipher_block_cpy(void *_dst, const void *_src, size_t blocksize)
+{
+  byte *dst = _dst;
+  const byte *src = _src;
+  u64 s[2];
+
+  if (blocksize == 8)
+    {
+      buf_put_he64(dst + 0, buf_get_he64(src + 0));
+    }
+  else /* blocksize == 16 */
+    {
+      s[0] = buf_get_he64(src + 0);
+      s[1] = buf_get_he64(src + 8);
+      buf_put_he64(dst + 0, s[0]);
+      buf_put_he64(dst + 8, s[1]);
+    }
+}
+
+
+/* Optimized function for cipher block xoring */
+static inline void
+cipher_block_xor(void *_dst, const void *_src1, const void *_src2,
+                 size_t blocksize)
+{
+  byte *dst = _dst;
+  const byte *src1 = _src1;
+  const byte *src2 = _src2;
+  u64 s1[2];
+  u64 s2[2];
+
+  if (blocksize == 8)
+    {
+      buf_put_he64(dst + 0, buf_get_he64(src1 + 0) ^ buf_get_he64(src2 + 0));
+    }
+  else /* blocksize == 16 */
+    {
+      s1[0] = buf_get_he64(src1 + 0);
+      s1[1] = buf_get_he64(src1 + 8);
+      s2[0] = buf_get_he64(src2 + 0);
+      s2[1] = buf_get_he64(src2 + 8);
+      buf_put_he64(dst + 0, s1[0] ^ s2[0]);
+      buf_put_he64(dst + 8, s1[1] ^ s2[1]);
+    }
+}
+
+
+/* Optimized function for in-place cipher block xoring */
+static inline void
+cipher_block_xor_1(void *_dst, const void *_src, size_t blocksize)
+{
+  cipher_block_xor (_dst, _dst, _src, blocksize);
+}
+
+
+/* Optimized function for cipher block xoring with two destination cipher
+   blocks.  Used mainly by CFB mode encryption.  */
+static inline void
+cipher_block_xor_2dst(void *_dst1, void *_dst2, const void *_src,
+                      size_t blocksize)
+{
+  byte *dst1 = _dst1;
+  byte *dst2 = _dst2;
+  const byte *src = _src;
+  u64 d2[2];
+  u64 s[2];
+
+  if (blocksize == 8)
+    {
+      d2[0] = buf_get_he64(dst2 + 0) ^ buf_get_he64(src + 0);
+      buf_put_he64(dst2 + 0, d2[0]);
+      buf_put_he64(dst1 + 0, d2[0]);
+    }
+  else /* blocksize == 16 */
+    {
+      s[0] = buf_get_he64(src + 0);
+      s[1] = buf_get_he64(src + 8);
+      d2[0] = buf_get_he64(dst2 + 0);
+      d2[1] = buf_get_he64(dst2 + 8);
+      d2[0] = d2[0] ^ s[0];
+      d2[1] = d2[1] ^ s[1];
+      buf_put_he64(dst2 + 0, d2[0]);
+      buf_put_he64(dst2 + 8, d2[1]);
+      buf_put_he64(dst1 + 0, d2[0]);
+      buf_put_he64(dst1 + 8, d2[1]);
+    }
+}
+
+
+/* Optimized function for combined cipher block xoring and copying.
+   Used by mainly CBC mode decryption.  */
+static inline void
+cipher_block_xor_n_copy_2(void *_dst_xor, const void *_src_xor,
+                          void *_srcdst_cpy, const void *_src_cpy,
+                          size_t blocksize)
+{
+  byte *dst_xor = _dst_xor;
+  byte *srcdst_cpy = _srcdst_cpy;
+  const byte *src_xor = _src_xor;
+  const byte *src_cpy = _src_cpy;
+  u64 sc[2];
+  u64 sx[2];
+  u64 sdc[2];
+
+  if (blocksize == 8)
+    {
+      sc[0] = buf_get_he64(src_cpy + 0);
+      buf_put_he64(dst_xor + 0,
+                   buf_get_he64(srcdst_cpy + 0) ^ buf_get_he64(src_xor + 0));
+      buf_put_he64(srcdst_cpy + 0, sc[0]);
+    }
+  else /* blocksize == 16 */
+    {
+      sc[0] = buf_get_he64(src_cpy + 0);
+      sc[1] = buf_get_he64(src_cpy + 8);
+      sx[0] = buf_get_he64(src_xor + 0);
+      sx[1] = buf_get_he64(src_xor + 8);
+      sdc[0] = buf_get_he64(srcdst_cpy + 0);
+      sdc[1] = buf_get_he64(srcdst_cpy + 8);
+      sx[0] ^= sdc[0];
+      sx[1] ^= sdc[1];
+      buf_put_he64(dst_xor + 0, sx[0]);
+      buf_put_he64(dst_xor + 8, sx[1]);
+      buf_put_he64(srcdst_cpy + 0, sc[0]);
+      buf_put_he64(srcdst_cpy + 8, sc[1]);
+    }
+}
+
+
+/* Optimized function for combined cipher block xoring and copying.
+   Used by mainly CFB mode decryption.  */
+static inline void
+cipher_block_xor_n_copy(void *_dst_xor, void *_srcdst_cpy, const void *_src,
+                        size_t blocksize)
+{
+  cipher_block_xor_n_copy_2(_dst_xor, _src, _srcdst_cpy, _src, blocksize);
+}
+
+
 #endif /*G10_CIPHER_INTERNAL_H*/
diff -up libgcrypt-1.8.5/cipher/cipher-ocb.c.aes-perf libgcrypt-1.8.5/cipher/cipher-ocb.c
--- libgcrypt-1.8.5/cipher/cipher-ocb.c.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/cipher-ocb.c	2020-04-22 18:29:41.668862268 +0200
@@ -82,7 +82,7 @@ static void
 double_block_cpy (unsigned char *d, const unsigned char *s)
 {
   if (d != s)
-    buf_cpy (d, s, OCB_BLOCK_LEN);
+    cipher_block_cpy (d, s, OCB_BLOCK_LEN);
   double_block (d);
 }
 
@@ -181,8 +181,8 @@ _gcry_cipher_ocb_set_nonce (gcry_cipher_
   nburn = c->spec->encrypt (&c->context.c, ktop, ktop);
   burn = nburn > burn ? nburn : burn;
   /* Stretch = Ktop || (Ktop[1..64] xor Ktop[9..72]) */
-  buf_cpy (stretch, ktop, OCB_BLOCK_LEN);
-  buf_xor (stretch + OCB_BLOCK_LEN, ktop, ktop + 1, 8);
+  cipher_block_cpy (stretch, ktop, OCB_BLOCK_LEN);
+  cipher_block_xor (stretch + OCB_BLOCK_LEN, ktop, ktop + 1, 8);
   /* Offset_0 = Stretch[1+bottom..128+bottom]
      (We use the IV field to store the offset) */
   bit_copy (c->u_iv.iv, stretch, bottom, OCB_BLOCK_LEN);
@@ -267,18 +267,18 @@ _gcry_cipher_ocb_authenticate (gcry_ciph
             }
           else
             {
-              buf_cpy (l_tmp, ocb_get_l (c, c->u_mode.ocb.aad_nblocks),
-                       OCB_BLOCK_LEN);
+              cipher_block_cpy (l_tmp, ocb_get_l (c, c->u_mode.ocb.aad_nblocks),
+                                OCB_BLOCK_LEN);
             }
 
           /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
-          buf_xor_1 (c->u_mode.ocb.aad_offset, l_tmp, OCB_BLOCK_LEN);
+          cipher_block_xor_1 (c->u_mode.ocb.aad_offset, l_tmp, OCB_BLOCK_LEN);
           /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
-          buf_xor (l_tmp, c->u_mode.ocb.aad_offset,
-                   c->u_mode.ocb.aad_leftover, OCB_BLOCK_LEN);
+          cipher_block_xor (l_tmp, c->u_mode.ocb.aad_offset,
+                            c->u_mode.ocb.aad_leftover, OCB_BLOCK_LEN);
           nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
           burn = nburn > burn ? nburn : burn;
-          buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
+          cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
 
           c->u_mode.ocb.aad_nleftover = 0;
         }
@@ -309,12 +309,13 @@ _gcry_cipher_ocb_authenticate (gcry_ciph
           ocb_get_L_big(c, c->u_mode.ocb.aad_nblocks, l_tmp);
 
           /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
-          buf_xor_1 (c->u_mode.ocb.aad_offset, l_tmp, OCB_BLOCK_LEN);
+          cipher_block_xor_1 (c->u_mode.ocb.aad_offset, l_tmp, OCB_BLOCK_LEN);
           /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
-          buf_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, OCB_BLOCK_LEN);
+          cipher_block_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf,
+                            OCB_BLOCK_LEN);
           nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
           burn = nburn > burn ? nburn : burn;
-          buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
+          cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
 
           abuf += OCB_BLOCK_LEN;
           abuflen -= OCB_BLOCK_LEN;
@@ -349,14 +350,15 @@ _gcry_cipher_ocb_authenticate (gcry_ciph
           gcry_assert(c->u_mode.ocb.aad_nblocks & table_size_mask);
 
           /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
-          buf_xor_1 (c->u_mode.ocb.aad_offset,
-                     ocb_get_l (c, c->u_mode.ocb.aad_nblocks),
-                     OCB_BLOCK_LEN);
+          cipher_block_xor_1 (c->u_mode.ocb.aad_offset,
+                              ocb_get_l (c, c->u_mode.ocb.aad_nblocks),
+                              OCB_BLOCK_LEN);
           /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
-          buf_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, OCB_BLOCK_LEN);
+          cipher_block_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf,
+                            OCB_BLOCK_LEN);
           nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
           burn = nburn > burn ? nburn : burn;
-          buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
+          cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
 
           abuf += OCB_BLOCK_LEN;
           abuflen -= OCB_BLOCK_LEN;
@@ -397,18 +399,18 @@ ocb_aad_finalize (gcry_cipher_hd_t c)
   if (c->u_mode.ocb.aad_nleftover)
     {
       /* Offset_* = Offset_m xor L_*  */
-      buf_xor_1 (c->u_mode.ocb.aad_offset,
-                 c->u_mode.ocb.L_star, OCB_BLOCK_LEN);
+      cipher_block_xor_1 (c->u_mode.ocb.aad_offset,
+                          c->u_mode.ocb.L_star, OCB_BLOCK_LEN);
       /* CipherInput = (A_* || 1 || zeros(127-bitlen(A_*))) xor Offset_*  */
       buf_cpy (l_tmp, c->u_mode.ocb.aad_leftover, c->u_mode.ocb.aad_nleftover);
       memset (l_tmp + c->u_mode.ocb.aad_nleftover, 0,
               OCB_BLOCK_LEN - c->u_mode.ocb.aad_nleftover);
       l_tmp[c->u_mode.ocb.aad_nleftover] = 0x80;
-      buf_xor_1 (l_tmp, c->u_mode.ocb.aad_offset, OCB_BLOCK_LEN);
+      cipher_block_xor_1 (l_tmp, c->u_mode.ocb.aad_offset, OCB_BLOCK_LEN);
       /* Sum = Sum_m xor ENCIPHER(K, CipherInput)  */
       nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
       burn = nburn > burn ? nburn : burn;
-      buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
+      cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
 
       c->u_mode.ocb.aad_nleftover = 0;
     }
@@ -431,7 +433,7 @@ ocb_checksum (unsigned char *chksum, con
   while (nblks > 0)
     {
       /* Checksum_i = Checksum_{i-1} xor P_i  */
-      buf_xor_1(chksum, plainbuf, OCB_BLOCK_LEN);
+      cipher_block_xor_1(chksum, plainbuf, OCB_BLOCK_LEN);
 
       plainbuf += OCB_BLOCK_LEN;
       nblks--;
@@ -491,12 +493,12 @@ ocb_crypt (gcry_cipher_hd_t c, int encry
             }
 
           /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
-          buf_xor_1 (c->u_iv.iv, l_tmp, OCB_BLOCK_LEN);
+          cipher_block_xor_1 (c->u_iv.iv, l_tmp, OCB_BLOCK_LEN);
           /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
-          buf_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN);
+          cipher_block_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN);
           nburn = crypt_fn (&c->context.c, outbuf, outbuf);
           burn = nburn > burn ? nburn : burn;
-          buf_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN);
+          cipher_block_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN);
 
           if (!encrypt)
             {
@@ -551,14 +553,14 @@ ocb_crypt (gcry_cipher_hd_t c, int encry
               gcry_assert(c->u_mode.ocb.data_nblocks & table_size_mask);
 
               /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
-              buf_xor_1 (c->u_iv.iv,
-                         ocb_get_l (c, c->u_mode.ocb.data_nblocks),
-                         OCB_BLOCK_LEN);
+              cipher_block_xor_1 (c->u_iv.iv,
+                                  ocb_get_l (c, c->u_mode.ocb.data_nblocks),
+                                  OCB_BLOCK_LEN);
               /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
-              buf_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN);
+              cipher_block_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN);
               nburn = crypt_fn (&c->context.c, outbuf, outbuf);
               burn = nburn > burn ? nburn : burn;
-              buf_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN);
+              cipher_block_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN);
 
               inbuf += OCB_BLOCK_LEN;
               inbuflen -= OCB_BLOCK_LEN;
@@ -584,7 +586,7 @@ ocb_crypt (gcry_cipher_hd_t c, int encry
       unsigned char pad[OCB_BLOCK_LEN];
 
       /* Offset_* = Offset_m xor L_*  */
-      buf_xor_1 (c->u_iv.iv, c->u_mode.ocb.L_star, OCB_BLOCK_LEN);
+      cipher_block_xor_1 (c->u_iv.iv, c->u_mode.ocb.L_star, OCB_BLOCK_LEN);
       /* Pad = ENCIPHER(K, Offset_*) */
       nburn = c->spec->encrypt (&c->context.c, pad, c->u_iv.iv);
       burn = nburn > burn ? nburn : burn;
@@ -596,7 +598,7 @@ ocb_crypt (gcry_cipher_hd_t c, int encry
           buf_cpy (l_tmp, inbuf, inbuflen);
           memset (l_tmp + inbuflen, 0, OCB_BLOCK_LEN - inbuflen);
           l_tmp[inbuflen] = 0x80;
-          buf_xor_1 (c->u_ctr.ctr, l_tmp, OCB_BLOCK_LEN);
+          cipher_block_xor_1 (c->u_ctr.ctr, l_tmp, OCB_BLOCK_LEN);
           /* C_* = P_* xor Pad[1..bitlen(P_*)] */
           buf_xor (outbuf, inbuf, pad, inbuflen);
         }
@@ -604,13 +606,13 @@ ocb_crypt (gcry_cipher_hd_t c, int encry
         {
           /* P_* = C_* xor Pad[1..bitlen(C_*)] */
           /* Checksum_* = Checksum_m xor (P_* || 1 || zeros(127-bitlen(P_*))) */
-          buf_cpy (l_tmp, pad, OCB_BLOCK_LEN);
+          cipher_block_cpy (l_tmp, pad, OCB_BLOCK_LEN);
           buf_cpy (l_tmp, inbuf, inbuflen);
-          buf_xor_1 (l_tmp, pad, OCB_BLOCK_LEN);
+          cipher_block_xor_1 (l_tmp, pad, OCB_BLOCK_LEN);
           l_tmp[inbuflen] = 0x80;
           buf_cpy (outbuf, l_tmp, inbuflen);
 
-          buf_xor_1 (c->u_ctr.ctr, l_tmp, OCB_BLOCK_LEN);
+          cipher_block_xor_1 (c->u_ctr.ctr, l_tmp, OCB_BLOCK_LEN);
         }
     }
 
@@ -618,8 +620,10 @@ ocb_crypt (gcry_cipher_hd_t c, int encry
   if (c->marks.finalize)
     {
       /* Tag = ENCIPHER(K, Checksum xor Offset xor L_$) xor HASH(K,A) */
-      buf_xor (c->u_mode.ocb.tag, c->u_ctr.ctr, c->u_iv.iv, OCB_BLOCK_LEN);
-      buf_xor_1 (c->u_mode.ocb.tag, c->u_mode.ocb.L_dollar, OCB_BLOCK_LEN);
+      cipher_block_xor (c->u_mode.ocb.tag, c->u_ctr.ctr, c->u_iv.iv,
+                        OCB_BLOCK_LEN);
+      cipher_block_xor_1 (c->u_mode.ocb.tag, c->u_mode.ocb.L_dollar,
+                          OCB_BLOCK_LEN);
       nburn = c->spec->encrypt (&c->context.c,
                                 c->u_mode.ocb.tag, c->u_mode.ocb.tag);
       burn = nburn > burn ? nburn : burn;
@@ -672,7 +676,8 @@ compute_tag_if_needed (gcry_cipher_hd_t
   if (!c->marks.tag)
     {
       ocb_aad_finalize (c);
-      buf_xor_1 (c->u_mode.ocb.tag, c->u_mode.ocb.aad_sum, OCB_BLOCK_LEN);
+      cipher_block_xor_1 (c->u_mode.ocb.tag, c->u_mode.ocb.aad_sum,
+                          OCB_BLOCK_LEN);
       c->marks.tag = 1;
     }
 }
diff -up libgcrypt-1.8.5/cipher/cipher-ofb.c.aes-perf libgcrypt-1.8.5/cipher/cipher-ofb.c
--- libgcrypt-1.8.5/cipher/cipher-ofb.c.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/cipher-ofb.c	2020-04-22 18:29:41.668862268 +0200
@@ -76,7 +76,7 @@ _gcry_cipher_ofb_encrypt (gcry_cipher_hd
       /* Encrypt the IV (and save the current one). */
       nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
       burn = nburn > burn ? nburn : burn;
-      buf_xor(outbuf, c->u_iv.iv, inbuf, blocksize);
+      cipher_block_xor(outbuf, c->u_iv.iv, inbuf, blocksize);
       outbuf += blocksize;
       inbuf += blocksize;
       inbuflen -= blocksize;
diff -up libgcrypt-1.8.5/cipher/cipher-selftest.c.aes-perf libgcrypt-1.8.5/cipher/cipher-selftest.c
--- libgcrypt-1.8.5/cipher/cipher-selftest.c.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/cipher-selftest.c	2020-04-22 18:29:41.669862248 +0200
@@ -105,7 +105,7 @@ _gcry_selftest_helper_cbc (const char *c
   ciphertext = plaintext2 + nblocks * blocksize;
 
   /* Initialize ctx */
-  if (setkey_func (ctx, key, sizeof(key)) != GPG_ERR_NO_ERROR)
+  if (setkey_func (ctx, key, sizeof(key), NULL) != GPG_ERR_NO_ERROR)
    {
      xfree(mem);
      return "setkey failed";
@@ -228,7 +228,7 @@ _gcry_selftest_helper_cfb (const char *c
   ciphertext = plaintext2 + nblocks * blocksize;
 
   /* Initialize ctx */
-  if (setkey_func (ctx, key, sizeof(key)) != GPG_ERR_NO_ERROR)
+  if (setkey_func (ctx, key, sizeof(key), NULL) != GPG_ERR_NO_ERROR)
    {
      xfree(mem);
      return "setkey failed";
@@ -351,7 +351,7 @@ _gcry_selftest_helper_ctr (const char *c
   ciphertext2 = ciphertext + nblocks * blocksize;
 
   /* Initialize ctx */
-  if (setkey_func (ctx, key, sizeof(key)) != GPG_ERR_NO_ERROR)
+  if (setkey_func (ctx, key, sizeof(key), NULL) != GPG_ERR_NO_ERROR)
    {
      xfree(mem);
      return "setkey failed";
diff -up libgcrypt-1.8.5/cipher/cipher-xts.c.aes-perf libgcrypt-1.8.5/cipher/cipher-xts.c
--- libgcrypt-1.8.5/cipher/cipher-xts.c.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/cipher-xts.c	2020-04-22 18:29:41.669862248 +0200
@@ -93,7 +93,8 @@ _gcry_cipher_xts_crypt (gcry_cipher_hd_t
   /* Use a bulk method if available.  */
   if (nblocks && c->bulk.xts_crypt)
     {
-      c->bulk.xts_crypt (c, c->u_ctr.ctr, outbuf, inbuf, nblocks, encrypt);
+      c->bulk.xts_crypt (&c->context.c, c->u_ctr.ctr, outbuf, inbuf, nblocks,
+			 encrypt);
       inbuf  += nblocks * GCRY_XTS_BLOCK_LEN;
       outbuf += nblocks * GCRY_XTS_BLOCK_LEN;
       inbuflen -= nblocks * GCRY_XTS_BLOCK_LEN;
@@ -106,10 +107,10 @@ _gcry_cipher_xts_crypt (gcry_cipher_hd_t
   while (nblocks)
     {
       /* Xor-Encrypt/Decrypt-Xor block. */
-      buf_xor (tmp.x64, inbuf, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN);
+      cipher_block_xor (tmp.x64, inbuf, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN);
       nburn = crypt_fn (&c->context.c, tmp.x1, tmp.x1);
       burn = nburn > burn ? nburn : burn;
-      buf_xor (outbuf, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN);
+      cipher_block_xor (outbuf, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN);
 
       outbuf += GCRY_XTS_BLOCK_LEN;
       inbuf += GCRY_XTS_BLOCK_LEN;
@@ -132,10 +133,10 @@ _gcry_cipher_xts_crypt (gcry_cipher_hd_t
 	  xts_gfmul_byA (tmp.x1, c->u_ctr.ctr);
 
 	  /* Decrypt last block first. */
-	  buf_xor (outbuf, inbuf, tmp.x64, GCRY_XTS_BLOCK_LEN);
+	  cipher_block_xor (outbuf, inbuf, tmp.x64, GCRY_XTS_BLOCK_LEN);
 	  nburn = crypt_fn (&c->context.c, outbuf, outbuf);
 	  burn = nburn > burn ? nburn : burn;
-	  buf_xor (outbuf, outbuf, tmp.x64, GCRY_XTS_BLOCK_LEN);
+	  cipher_block_xor (outbuf, outbuf, tmp.x64, GCRY_XTS_BLOCK_LEN);
 
 	  inbuflen -= GCRY_XTS_BLOCK_LEN;
 	  inbuf += GCRY_XTS_BLOCK_LEN;
@@ -146,15 +147,15 @@ _gcry_cipher_xts_crypt (gcry_cipher_hd_t
       outbuf -= GCRY_XTS_BLOCK_LEN;
 
       /* Steal ciphertext from previous block. */
-      buf_cpy (tmp.x64, outbuf, GCRY_XTS_BLOCK_LEN);
+      cipher_block_cpy (tmp.x64, outbuf, GCRY_XTS_BLOCK_LEN);
       buf_cpy (tmp.x64, inbuf, inbuflen);
       buf_cpy (outbuf + GCRY_XTS_BLOCK_LEN, outbuf, inbuflen);
 
       /* Decrypt/Encrypt last block. */
-      buf_xor (tmp.x64, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN);
+      cipher_block_xor (tmp.x64, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN);
       nburn = crypt_fn (&c->context.c, tmp.x1, tmp.x1);
       burn = nburn > burn ? nburn : burn;
-      buf_xor (outbuf, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN);
+      cipher_block_xor (outbuf, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN);
     }
 
   /* Auto-increment data-unit sequence number */
@@ -168,3 +169,21 @@ _gcry_cipher_xts_crypt (gcry_cipher_hd_t
 
   return 0;
 }
+
+
+gcry_err_code_t
+_gcry_cipher_xts_encrypt (gcry_cipher_hd_t c,
+                          unsigned char *outbuf, size_t outbuflen,
+                          const unsigned char *inbuf, size_t inbuflen)
+{
+  return _gcry_cipher_xts_crypt (c, outbuf, outbuflen, inbuf, inbuflen, 1);
+}
+
+
+gcry_err_code_t
+_gcry_cipher_xts_decrypt (gcry_cipher_hd_t c,
+                          unsigned char *outbuf, size_t outbuflen,
+                          const unsigned char *inbuf, size_t inbuflen)
+{
+  return _gcry_cipher_xts_crypt (c, outbuf, outbuflen, inbuf, inbuflen, 0);
+}
diff -up libgcrypt-1.8.5/cipher/des.c.aes-perf libgcrypt-1.8.5/cipher/des.c
--- libgcrypt-1.8.5/cipher/des.c.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/des.c	2020-04-22 18:29:41.669862248 +0200
@@ -119,6 +119,7 @@
 #include "g10lib.h"
 #include "cipher.h"
 #include "bufhelp.h"
+#include "cipher-internal.h"
 #include "cipher-selftest.h"
 
 
@@ -197,7 +198,8 @@ static unsigned int do_tripledes_encrypt
 static unsigned int do_tripledes_decrypt(void *context, byte *outbuf,
 					 const byte *inbuf );
 static gcry_err_code_t do_tripledes_setkey(void *context, const byte *key,
-                                           unsigned keylen);
+                                           unsigned keylen,
+                                           gcry_cipher_hd_t hd);
 
 static int initialized;
 
@@ -940,7 +942,7 @@ _gcry_3des_ctr_enc(void *context, unsign
       /* Encrypt the counter. */
       tripledes_ecb_encrypt (ctx, ctr, tmpbuf);
       /* XOR the input with the encrypted counter and store in output.  */
-      buf_xor(outbuf, tmpbuf, inbuf, DES_BLOCKSIZE);
+      cipher_block_xor(outbuf, tmpbuf, inbuf, DES_BLOCKSIZE);
       outbuf += DES_BLOCKSIZE;
       inbuf  += DES_BLOCKSIZE;
       /* Increment the counter.  */
@@ -996,7 +998,7 @@ _gcry_3des_cbc_dec(void *context, unsign
          the intermediate result to SAVEBUF.  */
       tripledes_ecb_decrypt (ctx, inbuf, savebuf);
 
-      buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, DES_BLOCKSIZE);
+      cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, DES_BLOCKSIZE);
       inbuf += DES_BLOCKSIZE;
       outbuf += DES_BLOCKSIZE;
     }
@@ -1041,7 +1043,7 @@ _gcry_3des_cfb_dec(void *context, unsign
   for ( ;nblocks; nblocks-- )
     {
       tripledes_ecb_encrypt (ctx, iv, iv);
-      buf_xor_n_copy(outbuf, iv, inbuf, DES_BLOCKSIZE);
+      cipher_block_xor_n_copy(outbuf, iv, inbuf, DES_BLOCKSIZE);
       outbuf += DES_BLOCKSIZE;
       inbuf  += DES_BLOCKSIZE;
     }
@@ -1086,7 +1088,8 @@ is_weak_key ( const byte *key )
 
 /* Alternative setkey for selftests; need larger key than default. */
 static gcry_err_code_t
-bulk_selftest_setkey (void *context, const byte *__key, unsigned __keylen)
+bulk_selftest_setkey (void *context, const byte *__key, unsigned __keylen,
+                      gcry_cipher_hd_t hd)
 {
   static const unsigned char key[24] ATTR_ALIGNED_16 = {
       0x66,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F,
@@ -1094,10 +1097,11 @@ bulk_selftest_setkey (void *context, con
       0x18,0x2A,0x39,0x47,0x5E,0x6F,0x75,0x82
     };
 
+  (void)hd;
   (void)__key;
   (void)__keylen;
 
-  return do_tripledes_setkey(context, key, sizeof(key));
+  return do_tripledes_setkey(context, key, sizeof(key), NULL);
 }
 
 
@@ -1349,10 +1353,13 @@ selftest (void)
 
 
 static gcry_err_code_t
-do_tripledes_setkey ( void *context, const byte *key, unsigned keylen )
+do_tripledes_setkey ( void *context, const byte *key, unsigned keylen,
+                      gcry_cipher_hd_t hd )
 {
   struct _tripledes_ctx *ctx = (struct _tripledes_ctx *) context;
 
+  (void)hd;
+
   if( keylen != 24 )
     return GPG_ERR_INV_KEYLEN;
 
@@ -1413,10 +1420,13 @@ do_tripledes_decrypt( void *context, byt
 }
 
 static gcry_err_code_t
-do_des_setkey (void *context, const byte *key, unsigned keylen)
+do_des_setkey (void *context, const byte *key, unsigned keylen,
+               gcry_cipher_hd_t hd)
 {
   struct _des_ctx *ctx = (struct _des_ctx *) context;
 
+  (void)hd;
+
   if (keylen != 8)
     return GPG_ERR_INV_KEYLEN;
 
diff -up libgcrypt-1.8.5/cipher/gost28147.c.aes-perf libgcrypt-1.8.5/cipher/gost28147.c
--- libgcrypt-1.8.5/cipher/gost28147.c.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/gost28147.c	2020-04-22 18:29:41.669862248 +0200
@@ -39,11 +39,14 @@
 #include "gost-sb.h"
 
 static gcry_err_code_t
-gost_setkey (void *c, const byte *key, unsigned keylen)
+gost_setkey (void *c, const byte *key, unsigned keylen,
+             gcry_cipher_hd_t hd)
 {
   int i;
   GOST28147_context *ctx = c;
 
+  (void)hd;
+
   if (keylen != 256 / 8)
     return GPG_ERR_INV_KEYLEN;
 
diff -up libgcrypt-1.8.5/cipher/idea.c.aes-perf libgcrypt-1.8.5/cipher/idea.c
--- libgcrypt-1.8.5/cipher/idea.c.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/idea.c	2020-04-22 18:29:41.670862229 +0200
@@ -258,10 +258,12 @@ do_setkey( IDEA_context *c, const byte *
 }
 
 static gcry_err_code_t
-idea_setkey (void *context, const byte *key, unsigned int keylen)
+idea_setkey (void *context, const byte *key, unsigned int keylen,
+             gcry_cipher_hd_t hd)
 {
     IDEA_context *ctx = context;
     int rc = do_setkey (ctx, key, keylen);
+    (void)hd;
     _gcry_burn_stack (23+6*sizeof(void*));
     return rc;
 }
diff -up libgcrypt-1.8.5/cipher/Makefile.am.aes-perf libgcrypt-1.8.5/cipher/Makefile.am
--- libgcrypt-1.8.5/cipher/Makefile.am.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/Makefile.am	2020-04-22 18:29:41.670862229 +0200
@@ -83,7 +83,8 @@ rijndael.c rijndael-internal.h rijndael-
   rijndael-padlock.c rijndael-amd64.S rijndael-arm.S \
   rijndael-ssse3-amd64.c rijndael-ssse3-amd64-asm.S \
   rijndael-armv8-ce.c rijndael-armv8-aarch32-ce.S rijndael-armv8-aarch64-ce.S \
-  rijndael-aarch64.S \
+  rijndael-aarch64.S rijndael-ppc.c rijndael-ppc9le.c \
+  rijndael-ppc-common.h rijndael-ppc-functions.h \
 rmd160.c \
 rsa.c \
 salsa20.c salsa20-amd64.S salsa20-armv7-neon.S \
@@ -128,3 +129,23 @@ tiger.o: $(srcdir)/tiger.c
 
 tiger.lo: $(srcdir)/tiger.c
 	`echo $(LTCOMPILE) -c $(srcdir)/tiger.c | $(o_flag_munging) `
+
+if ENABLE_PPC_VCRYPTO_EXTRA_CFLAGS
+ppc_vcrypto_cflags = -maltivec -mvsx -mcrypto
+else
+ppc_vcrypto_cflags =
+endif
+
+rijndael-ppc.o: $(srcdir)/rijndael-ppc.c Makefile
+	`echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< `
+
+rijndael-ppc.lo: $(srcdir)/rijndael-ppc.c Makefile
+	`echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< `
+
+rijndael-ppc9le.o: $(srcdir)/rijndael-ppc9le.c Makefile
+	`echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< `
+
+rijndael-ppc9le.lo: $(srcdir)/rijndael-ppc9le.c Makefile
+	`echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< `
+
+
diff -up libgcrypt-1.8.5/cipher/rfc2268.c.aes-perf libgcrypt-1.8.5/cipher/rfc2268.c
--- libgcrypt-1.8.5/cipher/rfc2268.c.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/rfc2268.c	2020-04-22 18:29:41.670862229 +0200
@@ -262,8 +262,10 @@ setkey_core (void *context, const unsign
 }
 
 static gpg_err_code_t
-do_setkey (void *context, const unsigned char *key, unsigned int keylen)
+do_setkey (void *context, const unsigned char *key, unsigned int keylen,
+           gcry_cipher_hd_t hd)
 {
+  (void)hd;
   return setkey_core (context, key, keylen, 1);
 }
 
diff -up libgcrypt-1.8.5/cipher/rijndael-aesni.c.aes-perf libgcrypt-1.8.5/cipher/rijndael-aesni.c
--- libgcrypt-1.8.5/cipher/rijndael-aesni.c.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/rijndael-aesni.c	2020-04-28 11:22:26.025519954 +0200
@@ -327,8 +327,8 @@ _gcry_aes_aesni_do_setkey (RIJNDAEL_cont
 
 
 /* Make a decryption key from an encryption key. */
-void
-_gcry_aes_aesni_prepare_decryption (RIJNDAEL_context *ctx)
+static inline void
+do_aesni_prepare_decryption (RIJNDAEL_context *ctx)
 {
   /* The AES-NI decrypt instructions use the Equivalent Inverse
      Cipher, thus we can't use the the standard decrypt key
@@ -338,8 +338,6 @@ _gcry_aes_aesni_prepare_decryption (RIJN
   int rr;
   int r;
 
-  aesni_prepare();
-
 #define DO_AESNI_AESIMC() \
   asm volatile ("movdqa %[ekey], %%xmm1\n\t" \
                 /*"aesimc %%xmm1, %%xmm1\n\t"*/ \
@@ -375,7 +373,13 @@ _gcry_aes_aesni_prepare_decryption (RIJN
   dkey[r] = ekey[0];
 
 #undef DO_AESNI_AESIMC
+}
 
+void
+_gcry_aes_aesni_prepare_decryption (RIJNDAEL_context *ctx)
+{
+  aesni_prepare();
+  do_aesni_prepare_decryption (ctx);
   aesni_cleanup();
 }
 
@@ -1023,8 +1027,8 @@ _gcry_aes_aesni_encrypt (const RIJNDAEL_
 
 
 void
-_gcry_aes_aesni_cfb_enc (RIJNDAEL_context *ctx, unsigned char *outbuf,
-                         const unsigned char *inbuf, unsigned char *iv,
+_gcry_aes_aesni_cfb_enc (RIJNDAEL_context *ctx, unsigned char *iv,
+                         unsigned char *outbuf, const unsigned char *inbuf,
                          size_t nblocks)
 {
   aesni_prepare ();
@@ -1059,8 +1063,8 @@ _gcry_aes_aesni_cfb_enc (RIJNDAEL_contex
 
 
 void
-_gcry_aes_aesni_cbc_enc (RIJNDAEL_context *ctx, unsigned char *outbuf,
-                         const unsigned char *inbuf, unsigned char *iv,
+_gcry_aes_aesni_cbc_enc (RIJNDAEL_context *ctx, unsigned char *iv,
+                         unsigned char *outbuf, const unsigned char *inbuf,
                          size_t nblocks, int cbc_mac)
 {
   aesni_prepare_2_6_variable;
@@ -1105,8 +1109,8 @@ _gcry_aes_aesni_cbc_enc (RIJNDAEL_contex
 
 
 void
-_gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, unsigned char *outbuf,
-                         const unsigned char *inbuf, unsigned char *ctr,
+_gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, unsigned char *ctr,
+                         unsigned char *outbuf, const unsigned char *inbuf,
                          size_t nblocks)
 {
   static const unsigned char be_mask[16] __attribute__ ((aligned (16))) =
@@ -1160,8 +1164,8 @@ _gcry_aes_aesni_decrypt (const RIJNDAEL_
 
 
 void
-_gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
-                         const unsigned char *inbuf, unsigned char *iv,
+_gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *iv,
+                         unsigned char *outbuf, const unsigned char *inbuf,
                          size_t nblocks)
 {
   aesni_prepare_2_6_variable;
@@ -1245,15 +1249,21 @@ _gcry_aes_aesni_cfb_dec (RIJNDAEL_contex
 
 
 void
-_gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
-			 const unsigned char *inbuf, unsigned char *iv,
-			 size_t nblocks)
+_gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv,
+                         unsigned char *outbuf, const unsigned char *inbuf,
+                         size_t nblocks)
 {
   aesni_prepare_2_6_variable;
 
   aesni_prepare ();
   aesni_prepare_2_6();
 
+  if ( !ctx->decryption_prepared )
+    {
+      do_aesni_prepare_decryption ( ctx );
+      ctx->decryption_prepared = 1;
+    }
+
   asm volatile
     ("movdqu %[iv], %%xmm5\n\t"	/* use xmm5 as fast IV storage */
      : /* No output */
@@ -1514,6 +1524,12 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void
   aesni_prepare ();
   aesni_prepare_2_6 ();
 
+  if ( !ctx->decryption_prepared )
+    {
+      do_aesni_prepare_decryption ( ctx );
+      ctx->decryption_prepared = 1;
+    }
+
   /* Preload Offset and Checksum */
   asm volatile ("movdqu %[iv], %%xmm5\n\t"
                 "movdqu %[ctr], %%xmm6\n\t"
@@ -1665,7 +1681,7 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void
 }
 
 
-void
+size_t
 _gcry_aes_aesni_ocb_crypt(gcry_cipher_hd_t c, void *outbuf_arg,
                           const void *inbuf_arg, size_t nblocks, int encrypt)
 {
@@ -1673,10 +1689,12 @@ _gcry_aes_aesni_ocb_crypt(gcry_cipher_hd
     aesni_ocb_enc(c, outbuf_arg, inbuf_arg, nblocks);
   else
     aesni_ocb_dec(c, outbuf_arg, inbuf_arg, nblocks);
+
+  return 0;
 }
 
 
-void
+size_t
 _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
                           size_t nblocks)
 {
@@ -1810,7 +1828,306 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd
 
   aesni_cleanup ();
   aesni_cleanup_2_6 ();
+
+  return 0;
 }
 
 
+static const u64 xts_gfmul_const[16] __attribute__ ((aligned (16))) =
+  { 0x87, 0x01 };
+
+
+static void
+_gcry_aes_aesni_xts_enc (RIJNDAEL_context *ctx, unsigned char *tweak,
+			 unsigned char *outbuf, const unsigned char *inbuf,
+			 size_t nblocks)
+{
+  aesni_prepare_2_6_variable;
+
+  aesni_prepare ();
+  aesni_prepare_2_6 ();
+
+  /* Preload Tweak */
+  asm volatile ("movdqu %[tweak], %%xmm5\n\t"
+		"movdqa %[gfmul], %%xmm6\n\t"
+		:
+		: [tweak] "m" (*tweak),
+		  [gfmul] "m" (*xts_gfmul_const)
+		: "memory" );
+
+  for ( ;nblocks >= 4; nblocks -= 4 )
+    {
+      asm volatile ("pshufd $0x13,     %%xmm5,  %%xmm4\n\t"
+		    "movdqu %[inbuf0], %%xmm1\n\t"
+		    "pxor   %%xmm5,    %%xmm1\n\t"
+		    "movdqu %%xmm5,    %[outbuf0]\n\t"
+
+		    "movdqa %%xmm4,    %%xmm0\n\t"
+		    "paddd  %%xmm4,    %%xmm4\n\t"
+		    "psrad  $31,       %%xmm0\n\t"
+		    "paddq  %%xmm5,    %%xmm5\n\t"
+		    "pand   %%xmm6,    %%xmm0\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    : [outbuf0] "=m" (*(outbuf + 0 * 16))
+		    : [inbuf0] "m" (*(inbuf + 0 * 16))
+		    : "memory" );
+
+      asm volatile ("movdqu %[inbuf1], %%xmm2\n\t"
+		    "pxor   %%xmm5,    %%xmm2\n\t"
+		    "movdqu %%xmm5,    %[outbuf1]\n\t"
+
+		    "movdqa %%xmm4,    %%xmm0\n\t"
+		    "paddd  %%xmm4,    %%xmm4\n\t"
+		    "psrad  $31,       %%xmm0\n\t"
+		    "paddq  %%xmm5,    %%xmm5\n\t"
+		    "pand   %%xmm6,    %%xmm0\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    : [outbuf1] "=m" (*(outbuf + 1 * 16))
+		    : [inbuf1] "m" (*(inbuf + 1 * 16))
+		    : "memory" );
+
+      asm volatile ("movdqu %[inbuf2], %%xmm3\n\t"
+		    "pxor   %%xmm5,    %%xmm3\n\t"
+		    "movdqu %%xmm5,    %[outbuf2]\n\t"
+
+		    "movdqa %%xmm4,    %%xmm0\n\t"
+		    "paddd  %%xmm4,    %%xmm4\n\t"
+		    "psrad  $31,       %%xmm0\n\t"
+		    "paddq  %%xmm5,    %%xmm5\n\t"
+		    "pand   %%xmm6,    %%xmm0\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    : [outbuf2] "=m" (*(outbuf + 2 * 16))
+		    : [inbuf2] "m" (*(inbuf + 2 * 16))
+		    : "memory" );
+
+      asm volatile ("movdqa %%xmm4,    %%xmm0\n\t"
+		    "movdqu %[inbuf3], %%xmm4\n\t"
+		    "pxor   %%xmm5,    %%xmm4\n\t"
+		    "movdqu %%xmm5,    %[outbuf3]\n\t"
+
+		    "psrad  $31,       %%xmm0\n\t"
+		    "paddq  %%xmm5,    %%xmm5\n\t"
+		    "pand   %%xmm6,    %%xmm0\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    : [outbuf3] "=m" (*(outbuf + 3 * 16))
+		    : [inbuf3] "m" (*(inbuf + 3 * 16))
+		    : "memory" );
+
+      do_aesni_enc_vec4 (ctx);
+
+      asm volatile ("movdqu %[outbuf0], %%xmm0\n\t"
+                    "pxor   %%xmm0,     %%xmm1\n\t"
+		    "movdqu %[outbuf1], %%xmm0\n\t"
+		    "movdqu %%xmm1,     %[outbuf0]\n\t"
+		    "movdqu %[outbuf2], %%xmm1\n\t"
+                    "pxor   %%xmm0,     %%xmm2\n\t"
+		    "movdqu %[outbuf3], %%xmm0\n\t"
+                    "pxor   %%xmm1,     %%xmm3\n\t"
+                    "pxor   %%xmm0,     %%xmm4\n\t"
+		    "movdqu %%xmm2,     %[outbuf1]\n\t"
+		    "movdqu %%xmm3,     %[outbuf2]\n\t"
+		    "movdqu %%xmm4,     %[outbuf3]\n\t"
+		    : [outbuf0] "+m" (*(outbuf + 0 * 16)),
+		      [outbuf1] "+m" (*(outbuf + 1 * 16)),
+		      [outbuf2] "+m" (*(outbuf + 2 * 16)),
+		      [outbuf3] "+m" (*(outbuf + 3 * 16))
+		    :
+		    : "memory" );
+
+      outbuf += BLOCKSIZE * 4;
+      inbuf += BLOCKSIZE * 4;
+    }
+
+  for ( ;nblocks; nblocks-- )
+    {
+      asm volatile ("movdqu %[inbuf],  %%xmm0\n\t"
+		    "pxor   %%xmm5,    %%xmm0\n\t"
+		    "movdqa %%xmm5,    %%xmm4\n\t"
+
+		    "pshufd $0x13,     %%xmm5,  %%xmm1\n\t"
+		    "psrad  $31,       %%xmm1\n\t"
+		    "paddq  %%xmm5,    %%xmm5\n\t"
+		    "pand   %%xmm6,    %%xmm1\n\t"
+		    "pxor   %%xmm1,    %%xmm5\n\t"
+		    :
+		    : [inbuf] "m" (*inbuf)
+		    : "memory" );
+
+      do_aesni_enc (ctx);
+
+      asm volatile ("pxor   %%xmm4,    %%xmm0\n\t"
+		    "movdqu %%xmm0,    %[outbuf]\n\t"
+		    : [outbuf] "=m" (*outbuf)
+		    :
+		    : "memory" );
+
+      outbuf += BLOCKSIZE;
+      inbuf += BLOCKSIZE;
+    }
+
+  asm volatile ("movdqu %%xmm5, %[tweak]\n\t"
+		: [tweak] "=m" (*tweak)
+		:
+		: "memory" );
+
+  aesni_cleanup ();
+  aesni_cleanup_2_6 ();
+}
+
+
+static void
+_gcry_aes_aesni_xts_dec (RIJNDAEL_context *ctx, unsigned char *tweak,
+			 unsigned char *outbuf, const unsigned char *inbuf,
+			 size_t nblocks)
+{
+  aesni_prepare_2_6_variable;
+
+  aesni_prepare ();
+  aesni_prepare_2_6 ();
+
+  if ( !ctx->decryption_prepared )
+    {
+      do_aesni_prepare_decryption ( ctx );
+      ctx->decryption_prepared = 1;
+    }
+
+  /* Preload Tweak */
+  asm volatile ("movdqu %[tweak], %%xmm5\n\t"
+		"movdqa %[gfmul], %%xmm6\n\t"
+		:
+		: [tweak] "m" (*tweak),
+		  [gfmul] "m" (*xts_gfmul_const)
+		: "memory" );
+
+  for ( ;nblocks >= 4; nblocks -= 4 )
+    {
+      asm volatile ("pshufd $0x13,     %%xmm5,  %%xmm4\n\t"
+		    "movdqu %[inbuf0], %%xmm1\n\t"
+		    "pxor   %%xmm5,    %%xmm1\n\t"
+		    "movdqu %%xmm5,    %[outbuf0]\n\t"
+
+		    "movdqa %%xmm4,    %%xmm0\n\t"
+		    "paddd  %%xmm4,    %%xmm4\n\t"
+		    "psrad  $31,       %%xmm0\n\t"
+		    "paddq  %%xmm5,    %%xmm5\n\t"
+		    "pand   %%xmm6,    %%xmm0\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    : [outbuf0] "=m" (*(outbuf + 0 * 16))
+		    : [inbuf0] "m" (*(inbuf + 0 * 16))
+		    : "memory" );
+
+      asm volatile ("movdqu %[inbuf1], %%xmm2\n\t"
+		    "pxor   %%xmm5,    %%xmm2\n\t"
+		    "movdqu %%xmm5,    %[outbuf1]\n\t"
+
+		    "movdqa %%xmm4,    %%xmm0\n\t"
+		    "paddd  %%xmm4,    %%xmm4\n\t"
+		    "psrad  $31,       %%xmm0\n\t"
+		    "paddq  %%xmm5,    %%xmm5\n\t"
+		    "pand   %%xmm6,    %%xmm0\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    : [outbuf1] "=m" (*(outbuf + 1 * 16))
+		    : [inbuf1] "m" (*(inbuf + 1 * 16))
+		    : "memory" );
+
+      asm volatile ("movdqu %[inbuf2], %%xmm3\n\t"
+		    "pxor   %%xmm5,    %%xmm3\n\t"
+		    "movdqu %%xmm5,    %[outbuf2]\n\t"
+
+		    "movdqa %%xmm4,    %%xmm0\n\t"
+		    "paddd  %%xmm4,    %%xmm4\n\t"
+		    "psrad  $31,       %%xmm0\n\t"
+		    "paddq  %%xmm5,    %%xmm5\n\t"
+		    "pand   %%xmm6,    %%xmm0\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    : [outbuf2] "=m" (*(outbuf + 2 * 16))
+		    : [inbuf2] "m" (*(inbuf + 2 * 16))
+		    : "memory" );
+
+      asm volatile ("movdqa %%xmm4,    %%xmm0\n\t"
+		    "movdqu %[inbuf3], %%xmm4\n\t"
+		    "pxor   %%xmm5,    %%xmm4\n\t"
+		    "movdqu %%xmm5,    %[outbuf3]\n\t"
+
+		    "psrad  $31,       %%xmm0\n\t"
+		    "paddq  %%xmm5,    %%xmm5\n\t"
+		    "pand   %%xmm6,    %%xmm0\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    : [outbuf3] "=m" (*(outbuf + 3 * 16))
+		    : [inbuf3] "m" (*(inbuf + 3 * 16))
+		    : "memory" );
+
+      do_aesni_dec_vec4 (ctx);
+
+      asm volatile ("movdqu %[outbuf0], %%xmm0\n\t"
+                    "pxor   %%xmm0,     %%xmm1\n\t"
+		    "movdqu %[outbuf1], %%xmm0\n\t"
+		    "movdqu %%xmm1,     %[outbuf0]\n\t"
+		    "movdqu %[outbuf2], %%xmm1\n\t"
+                    "pxor   %%xmm0,     %%xmm2\n\t"
+		    "movdqu %[outbuf3], %%xmm0\n\t"
+                    "pxor   %%xmm1,     %%xmm3\n\t"
+                    "pxor   %%xmm0,     %%xmm4\n\t"
+		    "movdqu %%xmm2,     %[outbuf1]\n\t"
+		    "movdqu %%xmm3,     %[outbuf2]\n\t"
+		    "movdqu %%xmm4,     %[outbuf3]\n\t"
+		    : [outbuf0] "+m" (*(outbuf + 0 * 16)),
+		      [outbuf1] "+m" (*(outbuf + 1 * 16)),
+		      [outbuf2] "+m" (*(outbuf + 2 * 16)),
+		      [outbuf3] "+m" (*(outbuf + 3 * 16))
+		    :
+		    : "memory" );
+
+      outbuf += BLOCKSIZE * 4;
+      inbuf += BLOCKSIZE * 4;
+    }
+
+  for ( ;nblocks; nblocks-- )
+    {
+      asm volatile ("movdqu %[inbuf],  %%xmm0\n\t"
+		    "pxor   %%xmm5,    %%xmm0\n\t"
+		    "movdqa %%xmm5,    %%xmm4\n\t"
+
+		    "pshufd $0x13,     %%xmm5,  %%xmm1\n\t"
+		    "psrad  $31,       %%xmm1\n\t"
+		    "paddq  %%xmm5,    %%xmm5\n\t"
+		    "pand   %%xmm6,    %%xmm1\n\t"
+		    "pxor   %%xmm1,    %%xmm5\n\t"
+		    :
+		    : [inbuf] "m" (*inbuf)
+		    : "memory" );
+
+      do_aesni_dec (ctx);
+
+      asm volatile ("pxor   %%xmm4,    %%xmm0\n\t"
+		    "movdqu %%xmm0,    %[outbuf]\n\t"
+		    : [outbuf] "=m" (*outbuf)
+		    :
+		    : "memory" );
+
+      outbuf += BLOCKSIZE;
+      inbuf += BLOCKSIZE;
+    }
+
+  asm volatile ("movdqu %%xmm5, %[tweak]\n\t"
+                : [tweak] "=m" (*tweak)
+                :
+                : "memory" );
+
+  aesni_cleanup ();
+  aesni_cleanup_2_6 ();
+}
+
+
+void
+_gcry_aes_aesni_xts_crypt (RIJNDAEL_context *ctx, unsigned char *tweak,
+			   unsigned char *outbuf, const unsigned char *inbuf,
+			   size_t nblocks, int encrypt)
+{
+  if (encrypt)
+    _gcry_aes_aesni_xts_enc(ctx, tweak, outbuf, inbuf, nblocks);
+  else
+    _gcry_aes_aesni_xts_dec(ctx, tweak, outbuf, inbuf, nblocks);
+}
+
 #endif /* USE_AESNI */
diff -up libgcrypt-1.8.5/cipher/rijndael-armv8-aarch32-ce.S.aes-perf libgcrypt-1.8.5/cipher/rijndael-armv8-aarch32-ce.S
--- libgcrypt-1.8.5/cipher/rijndael-armv8-aarch32-ce.S.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/rijndael-armv8-aarch32-ce.S	2020-04-22 18:29:41.673862172 +0200
@@ -1517,6 +1517,317 @@ _gcry_aes_ocb_auth_armv8_ce:
 .size _gcry_aes_ocb_auth_armv8_ce,.-_gcry_aes_ocb_auth_armv8_ce;
 
 
+
+/*
+ * void _gcry_aes_xts_enc_armv8_ce (const void *keysched,
+ *                                  unsigned char *outbuf,
+ *                                  const unsigned char *inbuf,
+ *                                  unsigned char *iv, unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_xts_enc_armv8_ce
+.type  _gcry_aes_xts_enc_armv8_ce,%function;
+_gcry_aes_xts_enc_armv8_ce:
+  /* input:
+   *    r0: keysched
+   *    r1: outbuf
+   *    r2: inbuf
+   *    r3: iv
+   *    %st+0: nblocks => r4
+   *    %st+4: nrounds => r5
+   */
+
+  vpush {q4-q7}
+  push {r4-r12,lr} /* 4*16 + 4*10 = 104b */
+  ldr r4, [sp, #(104+0)]
+  ldr r5, [sp, #(104+4)]
+  cmp r4, #0
+  beq .Lxts_enc_skip
+
+  cmp r5, #12
+
+  vld1.8 {q0}, [r3] /* load tweak */
+  mov r7, #0x87;
+
+  aes_preload_keys(r0, r6);
+
+  beq .Lxts_enc_entry_192
+  bhi .Lxts_enc_entry_256
+
+#define CTR_XTS(bits, ...) \
+  .Lxts_enc_entry_##bits: \
+    cmp r4, #4; \
+    blo .Lxts_enc_loop_##bits; \
+    \
+  .Lxts_enc_loop4_##bits: \
+    sub r4, r4, #4; \
+    veor q9, q9, q9; \
+    \
+    vld1.8 {q1-q2}, [r2]!; /* load plaintext */ \
+    veor q1, q1, q0; \
+    cmp r4, #4; \
+    vmov.u32 d18[0], r7; \
+    vst1.8 {q0}, [r1]!; /* store tweak0 to temp */ \
+    \
+    vshr.s64 d16, d1, #63; \
+    vshr.u64 d17, d0, #63; \
+    vadd.u64 q0, q0, q0; \
+    vand d16, d16, d18; \
+    veor q0, q0, q8; \
+    \
+    vld1.8 {q3-q4}, [r2]!; /* load plaintext */ \
+    veor q2, q2, q0; \
+    vst1.8 {q0}, [r1]!; /* store tweak1 to temp */ \
+    \
+    vshr.s64 d16, d1, #63; \
+    vshr.u64 d17, d0, #63; \
+    vadd.u64 q0, q0, q0; \
+    vand d16, d16, d18; \
+    veor q0, q0, q8; \
+    \
+    veor q3, q3, q0; \
+    vst1.8 {q0}, [r1]!; /* store tweak2 to temp */ \
+    \
+    vshr.s64 d16, d1, #63; \
+    vshr.u64 d17, d0, #63; \
+    vadd.u64 q0, q0, q0; \
+    vand d16, d16, d18; \
+    veor q0, q0, q8; \
+    \
+    veor q4, q4, q0; \
+    vst1.8 {q0}, [r1]; /* store tweak3 to temp */ \
+    sub r1, r1, #48; \
+    \
+    vshr.s64 d16, d1, #63; \
+    vshr.u64 d17, d0, #63; \
+    vadd.u64 q0, q0, q0; \
+    vand d16, d16, d18; \
+    veor q0, q0, q8; \
+    \
+    do_aes_4_##bits(e, mc, q1, q2, q3, q4, ##__VA_ARGS__); \
+    \
+    vld1.8 {q8-q9}, [r1]!; /* load tweak from temp */ \
+    veor q1, q1, q8; \
+    veor q2, q2, q9; \
+    vld1.8 {q8-q9}, [r1]; /* load tweak from temp */ \
+    sub r1, r1, #32; \
+    veor q3, q3, q8; \
+    veor q4, q4, q9; \
+    vst1.8 {q1-q2}, [r1]!; /* store plaintext */ \
+    vst1.8 {q3-q4}, [r1]!; /* store plaintext */ \
+    \
+    bhs .Lxts_enc_loop4_##bits; \
+    cmp r4, #0; \
+    beq .Lxts_enc_done; \
+    \
+  .Lxts_enc_loop_##bits: \
+    \
+    vld1.8 {q1}, [r2]!; /* load ciphertext */ \
+    \
+    veor q9, q9, q9; \
+    veor q1, q1, q0; \
+    vmov.u32 d18[0], r7; \
+    vmov q2, q0; \
+    \
+    vshr.s64 d16, d1, #63; \
+    vshr.u64 d17, d0, #63; \
+    vadd.u64 q0, q0, q0; \
+    vand d16, d16, d18; \
+    veor q0, q0, q8; \
+    subs r4, r4, #1; \
+    \
+    do_aes_one##bits(e, mc, q1, q1, ##__VA_ARGS__); \
+    \
+    veor q1, q1, q2; \
+    vst1.8 {q1}, [r1]!; /* store plaintext */ \
+    \
+    bne .Lxts_enc_loop_##bits; \
+    b .Lxts_enc_done;
+
+  CTR_XTS(128re, r0, r6)
+  CTR_XTS(192, r0, r6)
+  CTR_XTS(256, r0, r6)
+
+#undef CTR_XTS
+
+.Lxts_enc_done:
+  vst1.8 {q0}, [r3] /* store tweak */
+
+  CLEAR_REG(q0)
+  CLEAR_REG(q1)
+  CLEAR_REG(q2)
+  CLEAR_REG(q3)
+  CLEAR_REG(q8)
+  CLEAR_REG(q9)
+  CLEAR_REG(q10)
+  CLEAR_REG(q11)
+  CLEAR_REG(q12)
+  CLEAR_REG(q13)
+  CLEAR_REG(q14)
+
+.Lxts_enc_skip:
+  pop {r4-r12,lr}
+  vpop {q4-q7}
+  bx lr
+.size _gcry_aes_xts_enc_armv8_ce,.-_gcry_aes_xts_enc_armv8_ce;
+
+
+/*
+ * void _gcry_aes_xts_dec_armv8_ce (const void *keysched,
+ *                                  unsigned char *outbuf,
+ *                                  const unsigned char *inbuf,
+ *                                  unsigned char *iv, unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_xts_dec_armv8_ce
+.type  _gcry_aes_xts_dec_armv8_ce,%function;
+_gcry_aes_xts_dec_armv8_ce:
+  /* input:
+   *    r0: keysched
+   *    r1: outbuf
+   *    r2: inbuf
+   *    r3: iv
+   *    %st+0: nblocks => r4
+   *    %st+4: nrounds => r5
+   */
+
+  vpush {q4-q7}
+  push {r4-r12,lr} /* 4*16 + 4*10 = 104b */
+  ldr r4, [sp, #(104+0)]
+  ldr r5, [sp, #(104+4)]
+  cmp r4, #0
+  beq .Lxts_dec_skip
+
+  cmp r5, #12
+
+  vld1.8 {q0}, [r3] /* load tweak */
+  mov r7, #0x87;
+
+  aes_preload_keys(r0, r6);
+
+  beq .Lxts_dec_entry_192
+  bhi .Lxts_dec_entry_256
+
+#define CTR_XTS(bits, ...) \
+  .Lxts_dec_entry_##bits: \
+    cmp r4, #4; \
+    blo .Lxts_dec_loop_##bits; \
+    \
+  .Lxts_dec_loop4_##bits: \
+    sub r4, r4, #4; \
+    veor q9, q9, q9; \
+    \
+    vld1.8 {q1-q2}, [r2]!; /* load plaintext */ \
+    veor q1, q1, q0; \
+    cmp r4, #4; \
+    vmov.u32 d18[0], r7; \
+    vst1.8 {q0}, [r1]!; /* store tweak0 to temp */ \
+    \
+    vshr.s64 d16, d1, #63; \
+    vshr.u64 d17, d0, #63; \
+    vadd.u64 q0, q0, q0; \
+    vand d16, d16, d18; \
+    veor q0, q0, q8; \
+    \
+    vld1.8 {q3-q4}, [r2]!; /* load plaintext */ \
+    veor q2, q2, q0; \
+    vst1.8 {q0}, [r1]!; /* store tweak1 to temp */ \
+    \
+    vshr.s64 d16, d1, #63; \
+    vshr.u64 d17, d0, #63; \
+    vadd.u64 q0, q0, q0; \
+    vand d16, d16, d18; \
+    veor q0, q0, q8; \
+    \
+    veor q3, q3, q0; \
+    vst1.8 {q0}, [r1]!; /* store tweak2 to temp */ \
+    \
+    vshr.s64 d16, d1, #63; \
+    vshr.u64 d17, d0, #63; \
+    vadd.u64 q0, q0, q0; \
+    vand d16, d16, d18; \
+    veor q0, q0, q8; \
+    \
+    veor q4, q4, q0; \
+    vst1.8 {q0}, [r1]; /* store tweak3 to temp */ \
+    sub r1, r1, #48; \
+    \
+    vshr.s64 d16, d1, #63; \
+    vshr.u64 d17, d0, #63; \
+    vadd.u64 q0, q0, q0; \
+    vand d16, d16, d18; \
+    veor q0, q0, q8; \
+    \
+    do_aes_4_##bits(d, imc, q1, q2, q3, q4, ##__VA_ARGS__); \
+    \
+    vld1.8 {q8-q9}, [r1]!; /* load tweak from temp */ \
+    veor q1, q1, q8; \
+    veor q2, q2, q9; \
+    vld1.8 {q8-q9}, [r1]; /* load tweak from temp */ \
+    sub r1, r1, #32; \
+    veor q3, q3, q8; \
+    veor q4, q4, q9; \
+    vst1.8 {q1-q2}, [r1]!; /* store plaintext */ \
+    vst1.8 {q3-q4}, [r1]!; /* store plaintext */ \
+    \
+    bhs .Lxts_dec_loop4_##bits; \
+    cmp r4, #0; \
+    beq .Lxts_dec_done; \
+    \
+  .Lxts_dec_loop_##bits: \
+    \
+    vld1.8 {q1}, [r2]!; /* load ciphertext */ \
+    \
+    veor q9, q9, q9; \
+    veor q1, q1, q0; \
+    vmov.u32 d18[0], r7; \
+    vmov q2, q0; \
+    \
+    vshr.s64 d16, d1, #63; \
+    vshr.u64 d17, d0, #63; \
+    vadd.u64 q0, q0, q0; \
+    vand d16, d16, d18; \
+    veor q0, q0, q8; \
+    subs r4, r4, #1; \
+    \
+    do_aes_one##bits(d, imc, q1, q1, ##__VA_ARGS__); \
+    \
+    veor q1, q1, q2; \
+    vst1.8 {q1}, [r1]!; /* store plaintext */ \
+    \
+    bne .Lxts_dec_loop_##bits; \
+    b .Lxts_dec_done;
+
+  CTR_XTS(128re, r0, r6)
+  CTR_XTS(192, r0, r6)
+  CTR_XTS(256, r0, r6)
+
+#undef CTR_XTS
+
+.Lxts_dec_done:
+  vst1.8 {q0}, [r3] /* store tweak */
+
+  CLEAR_REG(q0)
+  CLEAR_REG(q1)
+  CLEAR_REG(q2)
+  CLEAR_REG(q3)
+  CLEAR_REG(q8)
+  CLEAR_REG(q9)
+  CLEAR_REG(q10)
+  CLEAR_REG(q11)
+  CLEAR_REG(q12)
+  CLEAR_REG(q13)
+  CLEAR_REG(q14)
+
+.Lxts_dec_skip:
+  pop {r4-r12,lr}
+  vpop {q4-q7}
+  bx lr
+.size _gcry_aes_xts_dec_armv8_ce,.-_gcry_aes_xts_dec_armv8_ce;
+
+
 /*
  * u32 _gcry_aes_sbox4_armv8_ce(u32 in4b);
  */
diff -up libgcrypt-1.8.5/cipher/rijndael-armv8-aarch64-ce.S.aes-perf libgcrypt-1.8.5/cipher/rijndael-armv8-aarch64-ce.S
--- libgcrypt-1.8.5/cipher/rijndael-armv8-aarch64-ce.S.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/rijndael-armv8-aarch64-ce.S	2020-04-22 18:29:41.674862153 +0200
@@ -1157,8 +1157,8 @@ _gcry_aes_ocb_auth_armv8_ce:
    *    w6: nrounds => w7
    *    w7: blkn => w12
    */
-  mov x12, x7
-  mov x7, x6
+  mov w12, w7
+  mov w7, w6
   mov x6, x5
   mov x5, x4
   mov x4, x3
@@ -1277,6 +1277,284 @@ _gcry_aes_ocb_auth_armv8_ce:
 
 
 /*
+ * void _gcry_aes_xts_enc_armv8_ce (const void *keysched,
+ *                                  unsigned char *outbuf,
+ *                                  const unsigned char *inbuf,
+ *                                  unsigned char *tweak,
+ *                                  size_t nblocks,
+ *                                  unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_xts_enc_armv8_ce
+.type  _gcry_aes_xts_enc_armv8_ce,%function;
+_gcry_aes_xts_enc_armv8_ce:
+  /* input:
+   *    r0: keysched
+   *    r1: outbuf
+   *    r2: inbuf
+   *    r3: tweak
+   *    x4: nblocks
+   *    w5: nrounds
+   */
+
+  cbz x4, .Lxts_enc_skip
+
+  /* load tweak */
+  ld1 {v0.16b}, [x3]
+
+  /* load gfmul mask */
+  mov x6, #0x87
+  mov x7, #0x01
+  mov v16.D[0], x6
+  mov v16.D[1], x7
+
+  aes_preload_keys(x0, w5);
+
+  b.eq .Lxts_enc_entry_192
+  b.hi .Lxts_enc_entry_256
+
+#define XTS_ENC(bits) \
+  .Lxts_enc_entry_##bits: \
+    cmp x4, #4; \
+    b.lo .Lxts_enc_loop_##bits; \
+    \
+  .Lxts_enc_loop4_##bits: \
+    \
+    ext v4.16b, v0.16b, v0.16b, #8; \
+    \
+    sshr v2.2d, v4.2d, #63; \
+    add v5.2d, v0.2d, v0.2d; \
+    and v2.16b, v2.16b, v16.16b; \
+    add v4.2d, v4.2d, v4.2d; \
+    eor v5.16b, v5.16b, v2.16b; \
+    \
+    sshr v2.2d, v4.2d, #63; \
+    add v6.2d, v5.2d, v5.2d; \
+    and v2.16b, v2.16b, v16.16b; \
+    add v4.2d, v4.2d, v4.2d; \
+    eor v6.16b, v6.16b, v2.16b; \
+    \
+    sshr v2.2d, v4.2d, #63; \
+    add v7.2d, v6.2d, v6.2d; \
+    and v2.16b, v2.16b, v16.16b; \
+    add v4.2d, v4.2d, v4.2d; \
+    eor v7.16b, v7.16b, v2.16b; \
+    \
+    sshr v2.2d, v4.2d, #63; \
+    add v3.2d, v7.2d, v7.2d; \
+    and v2.16b, v2.16b, v16.16b; \
+    add v4.2d, v4.2d, v4.2d; \
+    eor v3.16b, v3.16b, v2.16b; \
+    ld1 {v1.16b-v2.16b}, [x2], #32; /* load plaintext */ \
+    st1 {v3.16b}, [x3]; \
+    sub x4, x4, #4; \
+    eor v1.16b, v1.16b, v0.16b; \
+    \
+    ld1 {v3.16b-v4.16b}, [x2], #32; /* load plaintext */ \
+    cmp x4, #4; \
+    eor v2.16b, v2.16b, v5.16b; \
+    eor v3.16b, v3.16b, v6.16b; \
+    eor v4.16b, v4.16b, v7.16b; \
+    \
+    do_aes_4_##bits(e, mc, v1, v2, v3, v4); \
+    \
+    eor v1.16b, v1.16b, v0.16b; \
+    ld1 {v0.16b}, [x3]; \
+    eor v2.16b, v2.16b, v5.16b; \
+    eor v3.16b, v3.16b, v6.16b; \
+    eor v4.16b, v4.16b, v7.16b; \
+    st1 {v1.16b-v4.16b}, [x1], #64; /* store plaintext */ \
+    \
+    b.hs .Lxts_enc_loop4_##bits; \
+    CLEAR_REG(v3); \
+    CLEAR_REG(v4); \
+    CLEAR_REG(v5); \
+    CLEAR_REG(v6); \
+    CLEAR_REG(v7); \
+    cbz x4, .Lxts_enc_done; \
+    \
+  .Lxts_enc_loop_##bits: \
+    \
+    ld1 {v1.16b}, [x2], #16; /* load plaintext */ \
+    ext v3.16b, v0.16b, v0.16b, #8; \
+    mov v2.16b, v0.16b; \
+    sshr v3.2d, v3.2d, #63; \
+    add v0.2d, v0.2d, v0.2d; \
+    and v3.16b, v3.16b, v16.16b; \
+    eor v1.16b, v1.16b, v2.16b; \
+    eor v0.16b, v0.16b, v3.16b; \
+    sub x4, x4, #1; \
+    \
+    do_aes_one##bits(e, mc, v1, v1); \
+    \
+    eor v1.16b, v1.16b, v2.16b; \
+    st1 {v1.16b}, [x1], #16; /* store ciphertext */ \
+    \
+    cbnz x4, .Lxts_enc_loop_##bits; \
+    b .Lxts_enc_done;
+
+  XTS_ENC(128)
+  XTS_ENC(192)
+  XTS_ENC(256)
+
+#undef XTS_ENC
+
+.Lxts_enc_done:
+  aes_clear_keys(w5)
+
+  st1 {v0.16b}, [x3] /* store tweak */
+
+  CLEAR_REG(v0)
+  CLEAR_REG(v1)
+  CLEAR_REG(v2)
+
+.Lxts_enc_skip:
+  ret
+
+.size _gcry_aes_xts_enc_armv8_ce,.-_gcry_aes_xts_enc_armv8_ce;
+
+
+/*
+ * void _gcry_aes_xts_dec_armv8_ce (const void *keysched,
+ *                                  unsigned char *outbuf,
+ *                                  const unsigned char *inbuf,
+ *                                  unsigned char *tweak,
+ *                                  size_t nblocks,
+ *                                  unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_xts_dec_armv8_ce
+.type  _gcry_aes_xts_dec_armv8_ce,%function;
+_gcry_aes_xts_dec_armv8_ce:
+  /* input:
+   *    r0: keysched
+   *    r1: outbuf
+   *    r2: inbuf
+   *    r3: tweak
+   *    x4: nblocks
+   *    w5: nrounds
+   */
+
+  cbz x4, .Lxts_dec_skip
+
+  /* load tweak */
+  ld1 {v0.16b}, [x3]
+
+  /* load gfmul mask */
+  mov x6, #0x87
+  mov x7, #0x01
+  mov v16.D[0], x6
+  mov v16.D[1], x7
+
+  aes_preload_keys(x0, w5);
+
+  b.eq .Lxts_dec_entry_192
+  b.hi .Lxts_dec_entry_256
+
+#define XTS_DEC(bits) \
+  .Lxts_dec_entry_##bits: \
+    cmp x4, #4; \
+    b.lo .Lxts_dec_loop_##bits; \
+    \
+  .Lxts_dec_loop4_##bits: \
+    \
+    ext v4.16b, v0.16b, v0.16b, #8; \
+    \
+    sshr v2.2d, v4.2d, #63; \
+    add v5.2d, v0.2d, v0.2d; \
+    and v2.16b, v2.16b, v16.16b; \
+    add v4.2d, v4.2d, v4.2d; \
+    eor v5.16b, v5.16b, v2.16b; \
+    \
+    sshr v2.2d, v4.2d, #63; \
+    add v6.2d, v5.2d, v5.2d; \
+    and v2.16b, v2.16b, v16.16b; \
+    add v4.2d, v4.2d, v4.2d; \
+    eor v6.16b, v6.16b, v2.16b; \
+    \
+    sshr v2.2d, v4.2d, #63; \
+    add v7.2d, v6.2d, v6.2d; \
+    and v2.16b, v2.16b, v16.16b; \
+    add v4.2d, v4.2d, v4.2d; \
+    eor v7.16b, v7.16b, v2.16b; \
+    \
+    sshr v2.2d, v4.2d, #63; \
+    add v3.2d, v7.2d, v7.2d; \
+    and v2.16b, v2.16b, v16.16b; \
+    add v4.2d, v4.2d, v4.2d; \
+    eor v3.16b, v3.16b, v2.16b; \
+    ld1 {v1.16b-v2.16b}, [x2], #32; /* load plaintext */ \
+    st1 {v3.16b}, [x3]; \
+    sub x4, x4, #4; \
+    eor v1.16b, v1.16b, v0.16b; \
+    \
+    ld1 {v3.16b-v4.16b}, [x2], #32; /* load plaintext */ \
+    cmp x4, #4; \
+    eor v2.16b, v2.16b, v5.16b; \
+    eor v3.16b, v3.16b, v6.16b; \
+    eor v4.16b, v4.16b, v7.16b; \
+    \
+    do_aes_4_##bits(d, imc, v1, v2, v3, v4); \
+    \
+    eor v1.16b, v1.16b, v0.16b; \
+    ld1 {v0.16b}, [x3]; \
+    eor v2.16b, v2.16b, v5.16b; \
+    eor v3.16b, v3.16b, v6.16b; \
+    eor v4.16b, v4.16b, v7.16b; \
+    st1 {v1.16b-v4.16b}, [x1], #64; /* store plaintext */ \
+    \
+    b.hs .Lxts_dec_loop4_##bits; \
+    CLEAR_REG(v3); \
+    CLEAR_REG(v4); \
+    CLEAR_REG(v5); \
+    CLEAR_REG(v6); \
+    CLEAR_REG(v7); \
+    cbz x4, .Lxts_dec_done; \
+    \
+  .Lxts_dec_loop_##bits: \
+    \
+    ld1 {v1.16b}, [x2], #16; /* load plaintext */ \
+    ext v3.16b, v0.16b, v0.16b, #8; \
+    mov v2.16b, v0.16b; \
+    sshr v3.2d, v3.2d, #63; \
+    add v0.2d, v0.2d, v0.2d; \
+    and v3.16b, v3.16b, v16.16b; \
+    eor v1.16b, v1.16b, v2.16b; \
+    eor v0.16b, v0.16b, v3.16b; \
+    sub x4, x4, #1; \
+    \
+    do_aes_one##bits(d, imc, v1, v1); \
+    \
+    eor v1.16b, v1.16b, v2.16b; \
+    st1 {v1.16b}, [x1], #16; /* store ciphertext */ \
+    \
+    cbnz x4, .Lxts_dec_loop_##bits; \
+    b .Lxts_dec_done;
+
+  XTS_DEC(128)
+  XTS_DEC(192)
+  XTS_DEC(256)
+
+#undef XTS_DEC
+
+.Lxts_dec_done:
+  aes_clear_keys(w5)
+
+  st1 {v0.16b}, [x3] /* store tweak */
+
+  CLEAR_REG(v0)
+  CLEAR_REG(v1)
+  CLEAR_REG(v2)
+
+.Lxts_dec_skip:
+  ret
+
+.size _gcry_aes_xts_dec_armv8_ce,.-_gcry_aes_xts_dec_armv8_ce;
+
+
+/*
  * u32 _gcry_aes_sbox4_armv8_ce(u32 in4b);
  */
 .align 3
diff -up libgcrypt-1.8.5/cipher/rijndael-armv8-ce.c.aes-perf libgcrypt-1.8.5/cipher/rijndael-armv8-ce.c
--- libgcrypt-1.8.5/cipher/rijndael-armv8-ce.c.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/rijndael-armv8-ce.c	2020-04-22 18:29:41.675862134 +0200
@@ -101,6 +101,16 @@ extern void _gcry_aes_ocb_auth_armv8_ce
                                          size_t nblocks,
                                          unsigned int nrounds,
                                          unsigned int blkn);
+extern void _gcry_aes_xts_enc_armv8_ce (const void *keysched,
+                                        unsigned char *outbuf,
+                                        const unsigned char *inbuf,
+                                        unsigned char *tweak,
+                                        size_t nblocks, unsigned int nrounds);
+extern void _gcry_aes_xts_dec_armv8_ce (const void *keysched,
+                                        unsigned char *outbuf,
+                                        const unsigned char *inbuf,
+                                        unsigned char *tweak,
+                                        size_t nblocks, unsigned int nrounds);
 
 typedef void (*ocb_crypt_fn_t) (const void *keysched, unsigned char *outbuf,
                                 const unsigned char *inbuf,
@@ -108,6 +118,11 @@ typedef void (*ocb_crypt_fn_t) (const vo
                                 unsigned char *L_table, size_t nblocks,
                                 unsigned int nrounds, unsigned int blkn);
 
+typedef void (*xts_crypt_fn_t) (const void *keysched, unsigned char *outbuf,
+                                const unsigned char *inbuf,
+                                unsigned char *tweak, size_t nblocks,
+                                unsigned int nrounds);
+
 void
 _gcry_aes_armv8_ce_setkey (RIJNDAEL_context *ctx, const byte *key)
 {
@@ -269,8 +284,8 @@ _gcry_aes_armv8_ce_decrypt (const RIJNDA
 }
 
 void
-_gcry_aes_armv8_ce_cbc_enc (const RIJNDAEL_context *ctx, unsigned char *outbuf,
-                            const unsigned char *inbuf, unsigned char *iv,
+_gcry_aes_armv8_ce_cbc_enc (const RIJNDAEL_context *ctx, unsigned char *iv,
+                            unsigned char *outbuf, const unsigned char *inbuf,
                             size_t nblocks, int cbc_mac)
 {
   const void *keysched = ctx->keyschenc32;
@@ -281,19 +296,25 @@ _gcry_aes_armv8_ce_cbc_enc (const RIJNDA
 }
 
 void
-_gcry_aes_armv8_ce_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
-                            const unsigned char *inbuf, unsigned char *iv,
+_gcry_aes_armv8_ce_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv,
+                            unsigned char *outbuf, const unsigned char *inbuf,
                             size_t nblocks)
 {
   const void *keysched = ctx->keyschdec32;
   unsigned int nrounds = ctx->rounds;
 
+  if ( !ctx->decryption_prepared )
+    {
+      _gcry_aes_armv8_ce_prepare_decryption ( ctx );
+      ctx->decryption_prepared = 1;
+    }
+
   _gcry_aes_cbc_dec_armv8_ce(keysched, outbuf, inbuf, iv, nblocks, nrounds);
 }
 
 void
-_gcry_aes_armv8_ce_cfb_enc (RIJNDAEL_context *ctx, unsigned char *outbuf,
-                            const unsigned char *inbuf, unsigned char *iv,
+_gcry_aes_armv8_ce_cfb_enc (RIJNDAEL_context *ctx, unsigned char *iv,
+                            unsigned char *outbuf, const unsigned char *inbuf,
                             size_t nblocks)
 {
   const void *keysched = ctx->keyschenc32;
@@ -303,8 +324,8 @@ _gcry_aes_armv8_ce_cfb_enc (RIJNDAEL_con
 }
 
 void
-_gcry_aes_armv8_ce_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
-                            const unsigned char *inbuf, unsigned char *iv,
+_gcry_aes_armv8_ce_cfb_dec (RIJNDAEL_context *ctx, unsigned char *iv,
+                            unsigned char *outbuf, const unsigned char *inbuf,
                             size_t nblocks)
 {
   const void *keysched = ctx->keyschenc32;
@@ -314,8 +335,8 @@ _gcry_aes_armv8_ce_cfb_dec (RIJNDAEL_con
 }
 
 void
-_gcry_aes_armv8_ce_ctr_enc (RIJNDAEL_context *ctx, unsigned char *outbuf,
-                            const unsigned char *inbuf, unsigned char *iv,
+_gcry_aes_armv8_ce_ctr_enc (RIJNDAEL_context *ctx, unsigned char *iv,
+                            unsigned char *outbuf, const unsigned char *inbuf,
                             size_t nblocks)
 {
   const void *keysched = ctx->keyschenc32;
@@ -324,7 +345,7 @@ _gcry_aes_armv8_ce_ctr_enc (RIJNDAEL_con
   _gcry_aes_ctr_enc_armv8_ce(keysched, outbuf, inbuf, iv, nblocks, nrounds);
 }
 
-void
+size_t
 _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
                               const void *inbuf_arg, size_t nblocks,
                               int encrypt)
@@ -338,13 +359,21 @@ _gcry_aes_armv8_ce_ocb_crypt (gcry_ciphe
   unsigned int nrounds = ctx->rounds;
   u64 blkn = c->u_mode.ocb.data_nblocks;
 
+  if ( !encrypt && !ctx->decryption_prepared )
+    {
+      _gcry_aes_armv8_ce_prepare_decryption ( ctx );
+      ctx->decryption_prepared = 1;
+    }
+
   c->u_mode.ocb.data_nblocks = blkn + nblocks;
 
   crypt_fn(keysched, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr,
            c->u_mode.ocb.L[0], nblocks, nrounds, (unsigned int)blkn);
+
+  return 0;
 }
 
-void
+size_t
 _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg,
                              size_t nblocks)
 {
@@ -359,6 +388,27 @@ _gcry_aes_armv8_ce_ocb_auth (gcry_cipher
   _gcry_aes_ocb_auth_armv8_ce(keysched, abuf, c->u_mode.ocb.aad_offset,
 			      c->u_mode.ocb.aad_sum, c->u_mode.ocb.L[0],
 			      nblocks, nrounds, (unsigned int)blkn);
+
+  return 0;
+}
+
+void
+_gcry_aes_armv8_ce_xts_crypt (RIJNDAEL_context *ctx, unsigned char *tweak,
+			      unsigned char *outbuf, const unsigned char *inbuf,
+			      size_t nblocks, int encrypt)
+{
+  const void *keysched = encrypt ? ctx->keyschenc32 : ctx->keyschdec32;
+  xts_crypt_fn_t crypt_fn = encrypt ? _gcry_aes_xts_enc_armv8_ce
+                                    : _gcry_aes_xts_dec_armv8_ce;
+  unsigned int nrounds = ctx->rounds;
+
+  if ( !encrypt && !ctx->decryption_prepared )
+    {
+      _gcry_aes_armv8_ce_prepare_decryption ( ctx );
+      ctx->decryption_prepared = 1;
+    }
+
+  crypt_fn(keysched, outbuf, inbuf, tweak, nblocks, nrounds);
 }
 
 #endif /* USE_ARM_CE */
diff -up libgcrypt-1.8.5/cipher/rijndael.c.aes-perf libgcrypt-1.8.5/cipher/rijndael.c
--- libgcrypt-1.8.5/cipher/rijndael.c.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/rijndael.c	2020-04-22 18:29:41.676862114 +0200
@@ -77,32 +77,29 @@ extern unsigned int _gcry_aes_aesni_encr
 extern unsigned int _gcry_aes_aesni_decrypt (const RIJNDAEL_context *ctx,
                                              unsigned char *dst,
                                              const unsigned char *src);
-extern void _gcry_aes_aesni_cfb_enc (RIJNDAEL_context *ctx,
-                                     unsigned char *outbuf,
-                                     const unsigned char *inbuf,
-                                     unsigned char *iv, size_t nblocks);
-extern void _gcry_aes_aesni_cbc_enc (RIJNDAEL_context *ctx,
-                                     unsigned char *outbuf,
-                                     const unsigned char *inbuf,
-                                     unsigned char *iv, size_t nblocks,
-                                     int cbc_mac);
-extern void _gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx,
-                                     unsigned char *outbuf,
-                                     const unsigned char *inbuf,
-                                     unsigned char *ctr, size_t nblocks);
-extern void _gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx,
-                                     unsigned char *outbuf,
-                                     const unsigned char *inbuf,
-                                     unsigned char *iv, size_t nblocks);
-extern void _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx,
-                                     unsigned char *outbuf,
-                                     const unsigned char *inbuf,
-                                     unsigned char *iv, size_t nblocks);
-extern void _gcry_aes_aesni_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
-                                       const void *inbuf_arg, size_t nblocks,
-                                       int encrypt);
-extern void _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
-                                      size_t nblocks);
+extern void _gcry_aes_aesni_cfb_enc (void *context, unsigned char *iv,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks);
+extern void _gcry_aes_aesni_cbc_enc (void *context, unsigned char *iv,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks, int cbc_mac);
+extern void _gcry_aes_aesni_ctr_enc (void *context, unsigned char *ctr,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks);
+extern void _gcry_aes_aesni_cfb_dec (void *context, unsigned char *iv,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks);
+extern void _gcry_aes_aesni_cbc_dec (void *context, unsigned char *iv,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks);
+extern size_t _gcry_aes_aesni_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+                                         const void *inbuf_arg, size_t nblocks,
+                                         int encrypt);
+extern size_t _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+                                        size_t nblocks);
+extern void _gcry_aes_aesni_xts_crypt (void *context, unsigned char *tweak,
+                                       void *outbuf_arg, const void *inbuf_arg,
+                                       size_t nblocks, int encrypt);
 #endif
 
 #ifdef USE_SSSE3
@@ -116,32 +113,27 @@ extern unsigned int _gcry_aes_ssse3_encr
 extern unsigned int _gcry_aes_ssse3_decrypt (const RIJNDAEL_context *ctx,
                                              unsigned char *dst,
                                              const unsigned char *src);
-extern void _gcry_aes_ssse3_cfb_enc (RIJNDAEL_context *ctx,
-                                     unsigned char *outbuf,
-                                     const unsigned char *inbuf,
-                                     unsigned char *iv, size_t nblocks);
-extern void _gcry_aes_ssse3_cbc_enc (RIJNDAEL_context *ctx,
-                                     unsigned char *outbuf,
-                                     const unsigned char *inbuf,
-                                     unsigned char *iv, size_t nblocks,
+extern void _gcry_aes_ssse3_cfb_enc (void *context, unsigned char *iv,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks);
+extern void _gcry_aes_ssse3_cbc_enc (void *context, unsigned char *iv,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks,
                                      int cbc_mac);
-extern void _gcry_aes_ssse3_ctr_enc (RIJNDAEL_context *ctx,
-                                     unsigned char *outbuf,
-                                     const unsigned char *inbuf,
-                                     unsigned char *ctr, size_t nblocks);
-extern void _gcry_aes_ssse3_cfb_dec (RIJNDAEL_context *ctx,
-                                     unsigned char *outbuf,
-                                     const unsigned char *inbuf,
-                                     unsigned char *iv, size_t nblocks);
-extern void _gcry_aes_ssse3_cbc_dec (RIJNDAEL_context *ctx,
-                                     unsigned char *outbuf,
-                                     const unsigned char *inbuf,
-                                     unsigned char *iv, size_t nblocks);
-extern void _gcry_aes_ssse3_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
-                                       const void *inbuf_arg, size_t nblocks,
-                                       int encrypt);
-extern void _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
-                                      size_t nblocks);
+extern void _gcry_aes_ssse3_ctr_enc (void *context, unsigned char *ctr,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks);
+extern void _gcry_aes_ssse3_cfb_dec (void *context, unsigned char *iv,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks);
+extern void _gcry_aes_ssse3_cbc_dec (void *context, unsigned char *iv,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks);
+extern size_t _gcry_aes_ssse3_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+                                         const void *inbuf_arg, size_t nblocks,
+                                         int encrypt);
+extern size_t _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+                                        size_t nblocks);
 #endif
 
 #ifdef USE_PADLOCK
@@ -180,34 +172,110 @@ extern unsigned int _gcry_aes_armv8_ce_d
                                                unsigned char *dst,
                                                const unsigned char *src);
 
-extern void _gcry_aes_armv8_ce_cfb_enc (RIJNDAEL_context *ctx,
-                                        unsigned char *outbuf,
-                                        const unsigned char *inbuf,
-                                        unsigned char *iv, size_t nblocks);
-extern void _gcry_aes_armv8_ce_cbc_enc (RIJNDAEL_context *ctx,
-                                        unsigned char *outbuf,
-                                        const unsigned char *inbuf,
-                                        unsigned char *iv, size_t nblocks,
+extern void _gcry_aes_armv8_ce_cfb_enc (void *context, unsigned char *iv,
+                                        void *outbuf_arg, const void *inbuf_arg,
+                                        size_t nblocks);
+extern void _gcry_aes_armv8_ce_cbc_enc (void *context, unsigned char *iv,
+                                        void *outbuf_arg, const void *inbuf_arg,
+                                        size_t nblocks,
                                         int cbc_mac);
-extern void _gcry_aes_armv8_ce_ctr_enc (RIJNDAEL_context *ctx,
-                                        unsigned char *outbuf,
-                                        const unsigned char *inbuf,
-                                        unsigned char *ctr, size_t nblocks);
-extern void _gcry_aes_armv8_ce_cfb_dec (RIJNDAEL_context *ctx,
-                                        unsigned char *outbuf,
-                                        const unsigned char *inbuf,
-                                        unsigned char *iv, size_t nblocks);
-extern void _gcry_aes_armv8_ce_cbc_dec (RIJNDAEL_context *ctx,
-                                        unsigned char *outbuf,
-                                        const unsigned char *inbuf,
-                                        unsigned char *iv, size_t nblocks);
-extern void _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
-                                          const void *inbuf_arg, size_t nblocks,
-                                          int encrypt);
-extern void _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c,
-                                         const void *abuf_arg, size_t nblocks);
+extern void _gcry_aes_armv8_ce_ctr_enc (void *context, unsigned char *ctr,
+                                        void *outbuf_arg, const void *inbuf_arg,
+                                        size_t nblocks);
+extern void _gcry_aes_armv8_ce_cfb_dec (void *context, unsigned char *iv,
+                                        void *outbuf_arg, const void *inbuf_arg,
+                                        size_t nblocks);
+extern void _gcry_aes_armv8_ce_cbc_dec (void *context, unsigned char *iv,
+                                        void *outbuf_arg, const void *inbuf_arg,
+                                        size_t nblocks);
+extern size_t _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+                                            const void *inbuf_arg, size_t nblocks,
+                                            int encrypt);
+extern size_t _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c,
+                                           const void *abuf_arg, size_t nblocks);
+extern void _gcry_aes_armv8_ce_xts_crypt (void *context, unsigned char *tweak,
+                                          void *outbuf_arg,
+                                          const void *inbuf_arg,
+                                          size_t nblocks, int encrypt);
 #endif /*USE_ARM_ASM*/
 
+#ifdef USE_PPC_CRYPTO
+/* PowerPC Crypto implementations of AES */
+extern void _gcry_aes_ppc8_setkey(RIJNDAEL_context *ctx, const byte *key);
+extern void _gcry_aes_ppc8_prepare_decryption(RIJNDAEL_context *ctx);
+
+extern unsigned int _gcry_aes_ppc8_encrypt(const RIJNDAEL_context *ctx,
+					   unsigned char *dst,
+					   const unsigned char *src);
+extern unsigned int _gcry_aes_ppc8_decrypt(const RIJNDAEL_context *ctx,
+					   unsigned char *dst,
+					   const unsigned char *src);
+
+extern void _gcry_aes_ppc8_cfb_enc (void *context, unsigned char *iv,
+				    void *outbuf_arg, const void *inbuf_arg,
+				    size_t nblocks);
+extern void _gcry_aes_ppc8_cbc_enc (void *context, unsigned char *iv,
+				    void *outbuf_arg, const void *inbuf_arg,
+				    size_t nblocks, int cbc_mac);
+extern void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr,
+				    void *outbuf_arg, const void *inbuf_arg,
+				    size_t nblocks);
+extern void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv,
+				    void *outbuf_arg, const void *inbuf_arg,
+				    size_t nblocks);
+extern void _gcry_aes_ppc8_cbc_dec (void *context, unsigned char *iv,
+				    void *outbuf_arg, const void *inbuf_arg,
+				    size_t nblocks);
+
+extern size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+					const void *inbuf_arg, size_t nblocks,
+					int encrypt);
+extern size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c,
+				       const void *abuf_arg, size_t nblocks);
+
+extern void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak,
+				      void *outbuf_arg,
+				      const void *inbuf_arg,
+				      size_t nblocks, int encrypt);
+#endif /*USE_PPC_CRYPTO*/
+
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+/* Power9 little-endian crypto implementations of AES */
+extern unsigned int _gcry_aes_ppc9le_encrypt(const RIJNDAEL_context *ctx,
+					    unsigned char *dst,
+					    const unsigned char *src);
+extern unsigned int _gcry_aes_ppc9le_decrypt(const RIJNDAEL_context *ctx,
+					    unsigned char *dst,
+					    const unsigned char *src);
+
+extern void _gcry_aes_ppc9le_cfb_enc (void *context, unsigned char *iv,
+				      void *outbuf_arg, const void *inbuf_arg,
+				      size_t nblocks);
+extern void _gcry_aes_ppc9le_cbc_enc (void *context, unsigned char *iv,
+				      void *outbuf_arg, const void *inbuf_arg,
+				      size_t nblocks, int cbc_mac);
+extern void _gcry_aes_ppc9le_ctr_enc (void *context, unsigned char *ctr,
+				      void *outbuf_arg, const void *inbuf_arg,
+				      size_t nblocks);
+extern void _gcry_aes_ppc9le_cfb_dec (void *context, unsigned char *iv,
+				      void *outbuf_arg, const void *inbuf_arg,
+				      size_t nblocks);
+extern void _gcry_aes_ppc9le_cbc_dec (void *context, unsigned char *iv,
+				      void *outbuf_arg, const void *inbuf_arg,
+				      size_t nblocks);
+
+extern size_t _gcry_aes_ppc9le_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+					  const void *inbuf_arg, size_t nblocks,
+					  int encrypt);
+extern size_t _gcry_aes_ppc9le_ocb_auth (gcry_cipher_hd_t c,
+					const void *abuf_arg, size_t nblocks);
+
+extern void _gcry_aes_ppc9le_xts_crypt (void *context, unsigned char *tweak,
+					void *outbuf_arg,
+					const void *inbuf_arg,
+					size_t nblocks, int encrypt);
+#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/
+
 static unsigned int do_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx,
                                 const unsigned char *ax);
 static unsigned int do_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx,
@@ -260,7 +328,8 @@ static void prefetch_dec(void)
 
 /* Perform the key setup.  */
 static gcry_err_code_t
-do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen)
+do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen,
+           gcry_cipher_hd_t hd)
 {
   static int initialized = 0;
   static const char *selftest_failed = 0;
@@ -268,7 +337,7 @@ do_setkey (RIJNDAEL_context *ctx, const
   int i,j, r, t, rconpointer = 0;
   int KC;
 #if defined(USE_AESNI) || defined(USE_PADLOCK) || defined(USE_SSSE3) \
-    || defined(USE_ARM_CE)
+    || defined(USE_ARM_CE) || defined(USE_PPC_CRYPTO)
   unsigned int hwfeatures;
 #endif
 
@@ -310,7 +379,7 @@ do_setkey (RIJNDAEL_context *ctx, const
   ctx->rounds = rounds;
 
 #if defined(USE_AESNI) || defined(USE_PADLOCK) || defined(USE_SSSE3) \
-    || defined(USE_ARM_CE)
+    || defined(USE_ARM_CE) || defined(USE_PPC_CRYPTO)
   hwfeatures = _gcry_get_hw_features ();
 #endif
 
@@ -327,6 +396,12 @@ do_setkey (RIJNDAEL_context *ctx, const
 #ifdef USE_ARM_CE
   ctx->use_arm_ce = 0;
 #endif
+#ifdef USE_PPC_CRYPTO
+  ctx->use_ppc_crypto = 0;
+#endif
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+  ctx->use_ppc9le_crypto = 0;
+#endif
 
   if (0)
     {
@@ -340,6 +415,17 @@ do_setkey (RIJNDAEL_context *ctx, const
       ctx->prefetch_enc_fn = NULL;
       ctx->prefetch_dec_fn = NULL;
       ctx->use_aesni = 1;
+      if (hd)
+        {
+          hd->bulk.cfb_enc = _gcry_aes_aesni_cfb_enc;
+          hd->bulk.cfb_dec = _gcry_aes_aesni_cfb_dec;
+          hd->bulk.cbc_enc = _gcry_aes_aesni_cbc_enc;
+          hd->bulk.cbc_dec = _gcry_aes_aesni_cbc_dec;
+          hd->bulk.ctr_enc = _gcry_aes_aesni_ctr_enc;
+          hd->bulk.ocb_crypt = _gcry_aes_aesni_ocb_crypt;
+          hd->bulk.ocb_auth = _gcry_aes_aesni_ocb_auth;
+          hd->bulk.xts_crypt = _gcry_aes_aesni_xts_crypt;
+        }
     }
 #endif
 #ifdef USE_PADLOCK
@@ -361,6 +447,16 @@ do_setkey (RIJNDAEL_context *ctx, const
       ctx->prefetch_enc_fn = NULL;
       ctx->prefetch_dec_fn = NULL;
       ctx->use_ssse3 = 1;
+      if (hd)
+        {
+          hd->bulk.cfb_enc = _gcry_aes_ssse3_cfb_enc;
+          hd->bulk.cfb_dec = _gcry_aes_ssse3_cfb_dec;
+          hd->bulk.cbc_enc = _gcry_aes_ssse3_cbc_enc;
+          hd->bulk.cbc_dec = _gcry_aes_ssse3_cbc_dec;
+          hd->bulk.ctr_enc = _gcry_aes_ssse3_ctr_enc;
+          hd->bulk.ocb_crypt = _gcry_aes_ssse3_ocb_crypt;
+          hd->bulk.ocb_auth = _gcry_aes_ssse3_ocb_auth;
+        }
     }
 #endif
 #ifdef USE_ARM_CE
@@ -371,6 +467,60 @@ do_setkey (RIJNDAEL_context *ctx, const
       ctx->prefetch_enc_fn = NULL;
       ctx->prefetch_dec_fn = NULL;
       ctx->use_arm_ce = 1;
+      if (hd)
+        {
+          hd->bulk.cfb_enc = _gcry_aes_armv8_ce_cfb_enc;
+          hd->bulk.cfb_dec = _gcry_aes_armv8_ce_cfb_dec;
+          hd->bulk.cbc_enc = _gcry_aes_armv8_ce_cbc_enc;
+          hd->bulk.cbc_dec = _gcry_aes_armv8_ce_cbc_dec;
+          hd->bulk.ctr_enc = _gcry_aes_armv8_ce_ctr_enc;
+          hd->bulk.ocb_crypt = _gcry_aes_armv8_ce_ocb_crypt;
+          hd->bulk.ocb_auth = _gcry_aes_armv8_ce_ocb_auth;
+          hd->bulk.xts_crypt = _gcry_aes_armv8_ce_xts_crypt;
+        }
+    }
+#endif
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+  else if ((hwfeatures & HWF_PPC_VCRYPTO) && (hwfeatures & HWF_PPC_ARCH_3_00))
+    {
+      ctx->encrypt_fn = _gcry_aes_ppc9le_encrypt;
+      ctx->decrypt_fn = _gcry_aes_ppc9le_decrypt;
+      ctx->prefetch_enc_fn = NULL;
+      ctx->prefetch_dec_fn = NULL;
+      ctx->use_ppc_crypto = 1; /* same key-setup as USE_PPC_CRYPTO */
+      ctx->use_ppc9le_crypto = 1;
+      if (hd)
+        {
+          hd->bulk.cfb_enc = _gcry_aes_ppc9le_cfb_enc;
+          hd->bulk.cfb_dec = _gcry_aes_ppc9le_cfb_dec;
+          hd->bulk.cbc_enc = _gcry_aes_ppc9le_cbc_enc;
+          hd->bulk.cbc_dec = _gcry_aes_ppc9le_cbc_dec;
+          hd->bulk.ctr_enc = _gcry_aes_ppc9le_ctr_enc;
+          hd->bulk.ocb_crypt = _gcry_aes_ppc9le_ocb_crypt;
+          hd->bulk.ocb_auth = _gcry_aes_ppc9le_ocb_auth;
+          hd->bulk.xts_crypt = _gcry_aes_ppc9le_xts_crypt;
+        }
+    }
+#endif
+#ifdef USE_PPC_CRYPTO
+  else if (hwfeatures & HWF_PPC_VCRYPTO)
+    {
+      ctx->encrypt_fn = _gcry_aes_ppc8_encrypt;
+      ctx->decrypt_fn = _gcry_aes_ppc8_decrypt;
+      ctx->prefetch_enc_fn = NULL;
+      ctx->prefetch_dec_fn = NULL;
+      ctx->use_ppc_crypto = 1;
+      if (hd)
+        {
+          hd->bulk.cfb_enc = _gcry_aes_ppc8_cfb_enc;
+          hd->bulk.cfb_dec = _gcry_aes_ppc8_cfb_dec;
+          hd->bulk.cbc_enc = _gcry_aes_ppc8_cbc_enc;
+          hd->bulk.cbc_dec = _gcry_aes_ppc8_cbc_dec;
+          hd->bulk.ctr_enc = _gcry_aes_ppc8_ctr_enc;
+          hd->bulk.ocb_crypt = _gcry_aes_ppc8_ocb_crypt;
+          hd->bulk.ocb_auth = _gcry_aes_ppc8_ocb_auth;
+          hd->bulk.xts_crypt = _gcry_aes_ppc8_xts_crypt;
+        }
     }
 #endif
   else
@@ -399,6 +549,10 @@ do_setkey (RIJNDAEL_context *ctx, const
   else if (ctx->use_arm_ce)
     _gcry_aes_armv8_ce_setkey (ctx, key);
 #endif
+#ifdef USE_PPC_CRYPTO
+  else if (ctx->use_ppc_crypto)
+    _gcry_aes_ppc8_setkey (ctx, key);
+#endif
   else
     {
       const byte *sbox = ((const byte *)encT) + 1;
@@ -503,10 +657,11 @@ do_setkey (RIJNDAEL_context *ctx, const
 
 
 static gcry_err_code_t
-rijndael_setkey (void *context, const byte *key, const unsigned keylen)
+rijndael_setkey (void *context, const byte *key, const unsigned keylen,
+                 gcry_cipher_hd_t hd)
 {
   RIJNDAEL_context *ctx = context;
-  return do_setkey (ctx, key, keylen);
+  return do_setkey (ctx, key, keylen, hd);
 }
 
 
@@ -535,7 +690,19 @@ prepare_decryption( RIJNDAEL_context *ct
     {
       _gcry_aes_armv8_ce_prepare_decryption (ctx);
     }
-#endif /*USE_SSSE3*/
+#endif /*USE_ARM_CE*/
+#ifdef USE_ARM_CE
+  else if (ctx->use_arm_ce)
+    {
+      _gcry_aes_armv8_ce_prepare_decryption (ctx);
+    }
+#endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO
+  else if (ctx->use_ppc_crypto)
+    {
+      _gcry_aes_ppc8_prepare_decryption (ctx);
+    }
+#endif
 #ifdef USE_PADLOCK
   else if (ctx->use_padlock)
     {
@@ -790,42 +957,56 @@ _gcry_aes_cfb_enc (void *context, unsign
   const unsigned char *inbuf = inbuf_arg;
   unsigned int burn_depth = 0;
 
-  if (ctx->prefetch_enc_fn)
-    ctx->prefetch_enc_fn();
-
   if (0)
     ;
 #ifdef USE_AESNI
   else if (ctx->use_aesni)
     {
-      _gcry_aes_aesni_cfb_enc (ctx, outbuf, inbuf, iv, nblocks);
-      burn_depth = 0;
+      _gcry_aes_aesni_cfb_enc (ctx, iv, outbuf, inbuf, nblocks);
+      return;
     }
 #endif /*USE_AESNI*/
 #ifdef USE_SSSE3
   else if (ctx->use_ssse3)
     {
-      _gcry_aes_ssse3_cfb_enc (ctx, outbuf, inbuf, iv, nblocks);
-      burn_depth = 0;
+      _gcry_aes_ssse3_cfb_enc (ctx, iv, outbuf, inbuf, nblocks);
+      return;
     }
 #endif /*USE_SSSE3*/
 #ifdef USE_ARM_CE
   else if (ctx->use_arm_ce)
     {
-      _gcry_aes_armv8_ce_cfb_enc (ctx, outbuf, inbuf, iv, nblocks);
-      burn_depth = 0;
+      _gcry_aes_armv8_ce_cfb_enc (ctx, iv, outbuf, inbuf, nblocks);
+      return;
     }
 #endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+  else if (ctx->use_ppc9le_crypto)
+    {
+      _gcry_aes_ppc9le_cfb_enc (ctx, iv, outbuf, inbuf, nblocks);
+      return;
+    }
+#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/
+#ifdef USE_PPC_CRYPTO
+  else if (ctx->use_ppc_crypto)
+    {
+      _gcry_aes_ppc8_cfb_enc (ctx, iv, outbuf, inbuf, nblocks);
+      return;
+    }
+#endif /*USE_PPC_CRYPTO*/
   else
     {
       rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
 
+      if (ctx->prefetch_enc_fn)
+        ctx->prefetch_enc_fn();
+
       for ( ;nblocks; nblocks-- )
         {
           /* Encrypt the IV. */
           burn_depth = encrypt_fn (ctx, iv, iv);
           /* XOR the input with the IV and store input into IV.  */
-          buf_xor_2dst(outbuf, iv, inbuf, BLOCKSIZE);
+          cipher_block_xor_2dst(outbuf, iv, inbuf, BLOCKSIZE);
           outbuf += BLOCKSIZE;
           inbuf  += BLOCKSIZE;
         }
@@ -851,41 +1032,55 @@ _gcry_aes_cbc_enc (void *context, unsign
   unsigned char *last_iv;
   unsigned int burn_depth = 0;
 
-  if (ctx->prefetch_enc_fn)
-    ctx->prefetch_enc_fn();
-
   if (0)
     ;
 #ifdef USE_AESNI
   else if (ctx->use_aesni)
     {
-      _gcry_aes_aesni_cbc_enc (ctx, outbuf, inbuf, iv, nblocks, cbc_mac);
-      burn_depth = 0;
+      _gcry_aes_aesni_cbc_enc (ctx, iv, outbuf, inbuf, nblocks, cbc_mac);
+      return;
     }
 #endif /*USE_AESNI*/
 #ifdef USE_SSSE3
   else if (ctx->use_ssse3)
     {
-      _gcry_aes_ssse3_cbc_enc (ctx, outbuf, inbuf, iv, nblocks, cbc_mac);
-      burn_depth = 0;
+      _gcry_aes_ssse3_cbc_enc (ctx, iv, outbuf, inbuf, nblocks, cbc_mac);
+      return;
     }
 #endif /*USE_SSSE3*/
 #ifdef USE_ARM_CE
   else if (ctx->use_arm_ce)
     {
-      _gcry_aes_armv8_ce_cbc_enc (ctx, outbuf, inbuf, iv, nblocks, cbc_mac);
-      burn_depth = 0;
+      _gcry_aes_armv8_ce_cbc_enc (ctx, iv, outbuf, inbuf, nblocks, cbc_mac);
+      return;
     }
 #endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+  else if (ctx->use_ppc9le_crypto)
+    {
+      _gcry_aes_ppc9le_cbc_enc (ctx, iv, outbuf, inbuf, nblocks, cbc_mac);
+      return;
+    }
+#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/
+#ifdef USE_PPC_CRYPTO
+  else if (ctx->use_ppc_crypto)
+    {
+      _gcry_aes_ppc8_cbc_enc (ctx, iv, outbuf, inbuf, nblocks, cbc_mac);
+      return;
+    }
+#endif /*USE_PPC_CRYPTO*/
   else
     {
       rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
 
+      if (ctx->prefetch_enc_fn)
+        ctx->prefetch_enc_fn();
+
       last_iv = iv;
 
       for ( ;nblocks; nblocks-- )
         {
-          buf_xor(outbuf, inbuf, last_iv, BLOCKSIZE);
+          cipher_block_xor(outbuf, inbuf, last_iv, BLOCKSIZE);
 
           burn_depth = encrypt_fn (ctx, outbuf, outbuf);
 
@@ -896,7 +1091,7 @@ _gcry_aes_cbc_enc (void *context, unsign
         }
 
       if (last_iv != iv)
-        buf_cpy (iv, last_iv, BLOCKSIZE);
+        cipher_block_cpy (iv, last_iv, BLOCKSIZE);
     }
 
   if (burn_depth)
@@ -920,43 +1115,57 @@ _gcry_aes_ctr_enc (void *context, unsign
   unsigned int burn_depth = 0;
   int i;
 
-  if (ctx->prefetch_enc_fn)
-    ctx->prefetch_enc_fn();
-
   if (0)
     ;
 #ifdef USE_AESNI
   else if (ctx->use_aesni)
     {
-      _gcry_aes_aesni_ctr_enc (ctx, outbuf, inbuf, ctr, nblocks);
-      burn_depth = 0;
+      _gcry_aes_aesni_ctr_enc (ctx, ctr, outbuf, inbuf, nblocks);
+      return;
     }
 #endif /*USE_AESNI*/
 #ifdef USE_SSSE3
   else if (ctx->use_ssse3)
     {
-      _gcry_aes_ssse3_ctr_enc (ctx, outbuf, inbuf, ctr, nblocks);
-      burn_depth = 0;
+      _gcry_aes_ssse3_ctr_enc (ctx, ctr, outbuf, inbuf, nblocks);
+      return;
     }
 #endif /*USE_SSSE3*/
 #ifdef USE_ARM_CE
   else if (ctx->use_arm_ce)
     {
-      _gcry_aes_armv8_ce_ctr_enc (ctx, outbuf, inbuf, ctr, nblocks);
-      burn_depth = 0;
+      _gcry_aes_armv8_ce_ctr_enc (ctx, ctr, outbuf, inbuf, nblocks);
+      return;
     }
 #endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+  else if (ctx->use_ppc9le_crypto)
+    {
+      _gcry_aes_ppc9le_ctr_enc (ctx, ctr, outbuf, inbuf, nblocks);
+      return;
+    }
+#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/
+#ifdef USE_PPC_CRYPTO
+  else if (ctx->use_ppc_crypto)
+    {
+      _gcry_aes_ppc8_ctr_enc (ctx, ctr, outbuf, inbuf, nblocks);
+      return;
+    }
+#endif /*USE_PPC_CRYPTO*/
   else
     {
       union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } tmp;
       rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
 
+      if (ctx->prefetch_enc_fn)
+        ctx->prefetch_enc_fn();
+
       for ( ;nblocks; nblocks-- )
         {
           /* Encrypt the counter. */
           burn_depth = encrypt_fn (ctx, tmp.x1, ctr);
           /* XOR the input with the encrypted counter and store in output.  */
-          buf_xor(outbuf, tmp.x1, inbuf, BLOCKSIZE);
+          cipher_block_xor(outbuf, tmp.x1, inbuf, BLOCKSIZE);
           outbuf += BLOCKSIZE;
           inbuf  += BLOCKSIZE;
           /* Increment the counter.  */
@@ -1187,40 +1396,54 @@ _gcry_aes_cfb_dec (void *context, unsign
   const unsigned char *inbuf = inbuf_arg;
   unsigned int burn_depth = 0;
 
-  if (ctx->prefetch_enc_fn)
-    ctx->prefetch_enc_fn();
-
   if (0)
     ;
 #ifdef USE_AESNI
   else if (ctx->use_aesni)
     {
-      _gcry_aes_aesni_cfb_dec (ctx, outbuf, inbuf, iv, nblocks);
-      burn_depth = 0;
+      _gcry_aes_aesni_cfb_dec (ctx, iv, outbuf, inbuf, nblocks);
+      return;
     }
 #endif /*USE_AESNI*/
 #ifdef USE_SSSE3
   else if (ctx->use_ssse3)
     {
-      _gcry_aes_ssse3_cfb_dec (ctx, outbuf, inbuf, iv, nblocks);
-      burn_depth = 0;
+      _gcry_aes_ssse3_cfb_dec (ctx, iv, outbuf, inbuf, nblocks);
+      return;
     }
 #endif /*USE_SSSE3*/
 #ifdef USE_ARM_CE
   else if (ctx->use_arm_ce)
     {
-      _gcry_aes_armv8_ce_cfb_dec (ctx, outbuf, inbuf, iv, nblocks);
-      burn_depth = 0;
+      _gcry_aes_armv8_ce_cfb_dec (ctx, iv, outbuf, inbuf, nblocks);
+      return;
     }
 #endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+  else if (ctx->use_ppc9le_crypto)
+    {
+      _gcry_aes_ppc9le_cfb_dec (ctx, iv, outbuf, inbuf, nblocks);
+      return;
+    }
+#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/
+#ifdef USE_PPC_CRYPTO
+  else if (ctx->use_ppc_crypto)
+    {
+      _gcry_aes_ppc8_cfb_dec (ctx, iv, outbuf, inbuf, nblocks);
+      return;
+    }
+#endif /*USE_PPC_CRYPTO*/
   else
     {
       rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
 
+      if (ctx->prefetch_enc_fn)
+        ctx->prefetch_enc_fn();
+
       for ( ;nblocks; nblocks-- )
         {
           burn_depth = encrypt_fn (ctx, iv, iv);
-          buf_xor_n_copy(outbuf, iv, inbuf, BLOCKSIZE);
+          cipher_block_xor_n_copy(outbuf, iv, inbuf, BLOCKSIZE);
           outbuf += BLOCKSIZE;
           inbuf  += BLOCKSIZE;
         }
@@ -1245,39 +1468,53 @@ _gcry_aes_cbc_dec (void *context, unsign
   const unsigned char *inbuf = inbuf_arg;
   unsigned int burn_depth = 0;
 
-  check_decryption_preparation (ctx);
-
-  if (ctx->prefetch_dec_fn)
-    ctx->prefetch_dec_fn();
-
   if (0)
     ;
 #ifdef USE_AESNI
   else if (ctx->use_aesni)
     {
-      _gcry_aes_aesni_cbc_dec (ctx, outbuf, inbuf, iv, nblocks);
-      burn_depth = 0;
+      _gcry_aes_aesni_cbc_dec (ctx, iv, outbuf, inbuf, nblocks);
+      return;
     }
 #endif /*USE_AESNI*/
 #ifdef USE_SSSE3
   else if (ctx->use_ssse3)
     {
-      _gcry_aes_ssse3_cbc_dec (ctx, outbuf, inbuf, iv, nblocks);
-      burn_depth = 0;
+      _gcry_aes_ssse3_cbc_dec (ctx, iv, outbuf, inbuf, nblocks);
+      return;
     }
 #endif /*USE_SSSE3*/
 #ifdef USE_ARM_CE
   else if (ctx->use_arm_ce)
     {
-      _gcry_aes_armv8_ce_cbc_dec (ctx, outbuf, inbuf, iv, nblocks);
-      burn_depth = 0;
+      _gcry_aes_armv8_ce_cbc_dec (ctx, iv, outbuf, inbuf, nblocks);
+      return;
     }
 #endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+  else if (ctx->use_ppc9le_crypto)
+    {
+      _gcry_aes_ppc9le_cbc_dec (ctx, iv, outbuf, inbuf, nblocks);
+      return;
+    }
+#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/
+#ifdef USE_PPC_CRYPTO
+  else if (ctx->use_ppc_crypto)
+    {
+      _gcry_aes_ppc8_cbc_dec (ctx, iv, outbuf, inbuf, nblocks);
+      return;
+    }
+#endif /*USE_PPC_CRYPTO*/
   else
     {
       unsigned char savebuf[BLOCKSIZE] ATTR_ALIGNED_16;
       rijndael_cryptfn_t decrypt_fn = ctx->decrypt_fn;
 
+      check_decryption_preparation (ctx);
+
+      if (ctx->prefetch_dec_fn)
+        ctx->prefetch_dec_fn();
+
       for ( ;nblocks; nblocks-- )
         {
           /* INBUF is needed later and it may be identical to OUTBUF, so store
@@ -1285,7 +1522,7 @@ _gcry_aes_cbc_dec (void *context, unsign
 
           burn_depth = decrypt_fn (ctx, savebuf, inbuf);
 
-          buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, BLOCKSIZE);
+          cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, BLOCKSIZE);
           inbuf += BLOCKSIZE;
           outbuf += BLOCKSIZE;
         }
@@ -1309,62 +1546,61 @@ _gcry_aes_ocb_crypt (gcry_cipher_hd_t c,
   const unsigned char *inbuf = inbuf_arg;
   unsigned int burn_depth = 0;
 
-  if (encrypt)
-    {
-      if (ctx->prefetch_enc_fn)
-        ctx->prefetch_enc_fn();
-    }
-  else
-    {
-      check_decryption_preparation (ctx);
-
-      if (ctx->prefetch_dec_fn)
-        ctx->prefetch_dec_fn();
-    }
-
   if (0)
     ;
 #ifdef USE_AESNI
   else if (ctx->use_aesni)
     {
-      _gcry_aes_aesni_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt);
-      burn_depth = 0;
+      return _gcry_aes_aesni_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt);
     }
 #endif /*USE_AESNI*/
 #ifdef USE_SSSE3
   else if (ctx->use_ssse3)
     {
-      _gcry_aes_ssse3_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt);
-      burn_depth = 0;
+      return _gcry_aes_ssse3_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt);
     }
 #endif /*USE_SSSE3*/
 #ifdef USE_ARM_CE
   else if (ctx->use_arm_ce)
     {
-      _gcry_aes_armv8_ce_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt);
-      burn_depth = 0;
+      return _gcry_aes_armv8_ce_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt);
     }
 #endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+  else if (ctx->use_ppc9le_crypto)
+    {
+      return _gcry_aes_ppc9le_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt);
+    }
+#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/
+#ifdef USE_PPC_CRYPTO
+  else if (ctx->use_ppc_crypto)
+    {
+      return _gcry_aes_ppc8_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt);
+    }
+#endif /*USE_PPC_CRYPTO*/
   else if (encrypt)
     {
       union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
       rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
 
+      if (ctx->prefetch_enc_fn)
+        ctx->prefetch_enc_fn();
+
       for ( ;nblocks; nblocks-- )
         {
           u64 i = ++c->u_mode.ocb.data_nblocks;
           const unsigned char *l = ocb_get_l(c, i);
 
           /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
-          buf_xor_1 (c->u_iv.iv, l, BLOCKSIZE);
-          buf_cpy (l_tmp.x1, inbuf, BLOCKSIZE);
+          cipher_block_xor_1 (c->u_iv.iv, l, BLOCKSIZE);
+          cipher_block_cpy (l_tmp.x1, inbuf, BLOCKSIZE);
           /* Checksum_i = Checksum_{i-1} xor P_i  */
-          buf_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE);
+          cipher_block_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE);
           /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
-          buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
+          cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
           burn_depth = encrypt_fn (ctx, l_tmp.x1, l_tmp.x1);
-          buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
-          buf_cpy (outbuf, l_tmp.x1, BLOCKSIZE);
+          cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
+          cipher_block_cpy (outbuf, l_tmp.x1, BLOCKSIZE);
 
           inbuf += BLOCKSIZE;
           outbuf += BLOCKSIZE;
@@ -1375,21 +1611,26 @@ _gcry_aes_ocb_crypt (gcry_cipher_hd_t c,
       union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
       rijndael_cryptfn_t decrypt_fn = ctx->decrypt_fn;
 
+      check_decryption_preparation (ctx);
+
+      if (ctx->prefetch_dec_fn)
+        ctx->prefetch_dec_fn();
+
       for ( ;nblocks; nblocks-- )
         {
           u64 i = ++c->u_mode.ocb.data_nblocks;
           const unsigned char *l = ocb_get_l(c, i);
 
           /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
-          buf_xor_1 (c->u_iv.iv, l, BLOCKSIZE);
-          buf_cpy (l_tmp.x1, inbuf, BLOCKSIZE);
+          cipher_block_xor_1 (c->u_iv.iv, l, BLOCKSIZE);
+          cipher_block_cpy (l_tmp.x1, inbuf, BLOCKSIZE);
           /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
-          buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
+          cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
           burn_depth = decrypt_fn (ctx, l_tmp.x1, l_tmp.x1);
-          buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
+          cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
           /* Checksum_i = Checksum_{i-1} xor P_i  */
-          buf_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE);
-          buf_cpy (outbuf, l_tmp.x1, BLOCKSIZE);
+          cipher_block_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE);
+          cipher_block_cpy (outbuf, l_tmp.x1, BLOCKSIZE);
 
           inbuf += BLOCKSIZE;
           outbuf += BLOCKSIZE;
@@ -1411,48 +1652,58 @@ _gcry_aes_ocb_auth (gcry_cipher_hd_t c,
   const unsigned char *abuf = abuf_arg;
   unsigned int burn_depth = 0;
 
-  if (ctx->prefetch_enc_fn)
-    ctx->prefetch_enc_fn();
-
   if (0)
     ;
 #ifdef USE_AESNI
   else if (ctx->use_aesni)
     {
-      _gcry_aes_aesni_ocb_auth (c, abuf, nblocks);
-      burn_depth = 0;
+      return _gcry_aes_aesni_ocb_auth (c, abuf, nblocks);
     }
 #endif /*USE_AESNI*/
 #ifdef USE_SSSE3
   else if (ctx->use_ssse3)
     {
-      _gcry_aes_ssse3_ocb_auth (c, abuf, nblocks);
-      burn_depth = 0;
+      return _gcry_aes_ssse3_ocb_auth (c, abuf, nblocks);
     }
 #endif /*USE_SSSE3*/
 #ifdef USE_ARM_CE
   else if (ctx->use_arm_ce)
     {
-      _gcry_aes_armv8_ce_ocb_auth (c, abuf, nblocks);
-      burn_depth = 0;
+      return _gcry_aes_armv8_ce_ocb_auth (c, abuf, nblocks);
     }
 #endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+  else if (ctx->use_ppc9le_crypto)
+    {
+      return _gcry_aes_ppc9le_ocb_auth (c, abuf, nblocks);
+    }
+#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/
+#ifdef USE_PPC_CRYPTO
+  else if (ctx->use_ppc_crypto)
+    {
+      return _gcry_aes_ppc8_ocb_auth (c, abuf, nblocks);
+    }
+#endif /*USE_PPC_CRYPTO*/
   else
     {
       union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
       rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
 
+      if (ctx->prefetch_enc_fn)
+        ctx->prefetch_enc_fn();
+
       for ( ;nblocks; nblocks-- )
         {
           u64 i = ++c->u_mode.ocb.aad_nblocks;
           const unsigned char *l = ocb_get_l(c, i);
 
           /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
-          buf_xor_1 (c->u_mode.ocb.aad_offset, l, BLOCKSIZE);
+          cipher_block_xor_1 (c->u_mode.ocb.aad_offset, l, BLOCKSIZE);
           /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
-          buf_xor (l_tmp.x1, c->u_mode.ocb.aad_offset, abuf, BLOCKSIZE);
+          cipher_block_xor (l_tmp.x1, c->u_mode.ocb.aad_offset, abuf,
+                            BLOCKSIZE);
           burn_depth = encrypt_fn (ctx, l_tmp.x1, l_tmp.x1);
-          buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp.x1, BLOCKSIZE);
+          cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp.x1, BLOCKSIZE);
 
           abuf += BLOCKSIZE;
         }
@@ -1467,6 +1718,106 @@ _gcry_aes_ocb_auth (gcry_cipher_hd_t c,
 }
 
 
+/* Bulk encryption/decryption of complete blocks in XTS mode. */
+void
+_gcry_aes_xts_crypt (void *context, unsigned char *tweak,
+		     void *outbuf_arg, const void *inbuf_arg,
+		     size_t nblocks, int encrypt)
+{
+  RIJNDAEL_context *ctx = context;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  unsigned int burn_depth = 0;
+  rijndael_cryptfn_t crypt_fn;
+  u64 tweak_lo, tweak_hi, tweak_next_lo, tweak_next_hi, tmp_lo, tmp_hi, carry;
+
+  if (0)
+    ;
+#ifdef USE_AESNI
+  else if (ctx->use_aesni)
+    {
+      _gcry_aes_aesni_xts_crypt (ctx, tweak, outbuf, inbuf, nblocks, encrypt);
+      return;
+    }
+#endif /*USE_AESNI*/
+#ifdef USE_ARM_CE
+  else if (ctx->use_arm_ce)
+    {
+      _gcry_aes_armv8_ce_xts_crypt (ctx, tweak, outbuf, inbuf, nblocks, encrypt);
+      return;
+    }
+#endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+  else if (ctx->use_ppc9le_crypto)
+    {
+      _gcry_aes_ppc9le_xts_crypt (ctx, tweak, outbuf, inbuf, nblocks, encrypt);
+      return;
+    }
+#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/
+#ifdef USE_PPC_CRYPTO
+  else if (ctx->use_ppc_crypto)
+    {
+      _gcry_aes_ppc8_xts_crypt (ctx, tweak, outbuf, inbuf, nblocks, encrypt);
+      return;
+    }
+#endif /*USE_PPC_CRYPTO*/
+  else
+    {
+      if (encrypt)
+        {
+          if (ctx->prefetch_enc_fn)
+            ctx->prefetch_enc_fn();
+
+          crypt_fn = ctx->encrypt_fn;
+        }
+      else
+        {
+          check_decryption_preparation (ctx);
+
+          if (ctx->prefetch_dec_fn)
+            ctx->prefetch_dec_fn();
+
+          crypt_fn = ctx->decrypt_fn;
+        }
+
+      tweak_next_lo = buf_get_le64 (tweak + 0);
+      tweak_next_hi = buf_get_le64 (tweak + 8);
+
+      while (nblocks)
+	{
+	  tweak_lo = tweak_next_lo;
+	  tweak_hi = tweak_next_hi;
+
+	  /* Xor-Encrypt/Decrypt-Xor block. */
+	  tmp_lo = buf_get_le64 (inbuf + 0) ^ tweak_lo;
+	  tmp_hi = buf_get_le64 (inbuf + 8) ^ tweak_hi;
+
+	  buf_put_le64 (outbuf + 0, tmp_lo);
+	  buf_put_le64 (outbuf + 8, tmp_hi);
+
+	  /* Generate next tweak. */
+	  carry = -(tweak_next_hi >> 63) & 0x87;
+	  tweak_next_hi = (tweak_next_hi << 1) + (tweak_next_lo >> 63);
+	  tweak_next_lo = (tweak_next_lo << 1) ^ carry;
+
+	  burn_depth = crypt_fn (ctx, outbuf, outbuf);
+
+	  buf_put_le64 (outbuf + 0, buf_get_le64 (outbuf + 0) ^ tweak_lo);
+	  buf_put_le64 (outbuf + 8, buf_get_le64 (outbuf + 8) ^ tweak_hi);
+
+	  outbuf += GCRY_XTS_BLOCK_LEN;
+	  inbuf += GCRY_XTS_BLOCK_LEN;
+	  nblocks--;
+	}
+
+      buf_put_le64 (tweak + 0, tweak_next_lo);
+      buf_put_le64 (tweak + 8, tweak_next_hi);
+    }
+
+  if (burn_depth)
+    _gcry_burn_stack (burn_depth + 5 * sizeof(void *));
+}
+
 
 /* Run the self-tests for AES 128.  Returns NULL on success. */
 static const char*
@@ -1522,7 +1873,7 @@ selftest_basic_128 (void)
   if (!ctx)
     return "failed to allocate memory";
 
-  rijndael_setkey (ctx, key_128, sizeof (key_128));
+  rijndael_setkey (ctx, key_128, sizeof (key_128), NULL);
   rijndael_encrypt (ctx, scratch, plaintext_128);
   if (memcmp (scratch, ciphertext_128, sizeof (ciphertext_128)))
     {
@@ -1565,7 +1916,7 @@ selftest_basic_192 (void)
   ctx = _gcry_cipher_selftest_alloc_ctx (sizeof *ctx, &ctxmem);
   if (!ctx)
     return "failed to allocate memory";
-  rijndael_setkey (ctx, key_192, sizeof(key_192));
+  rijndael_setkey (ctx, key_192, sizeof(key_192), NULL);
   rijndael_encrypt (ctx, scratch, plaintext_192);
   if (memcmp (scratch, ciphertext_192, sizeof (ciphertext_192)))
     {
@@ -1610,7 +1961,7 @@ selftest_basic_256 (void)
   ctx = _gcry_cipher_selftest_alloc_ctx (sizeof *ctx, &ctxmem);
   if (!ctx)
     return "failed to allocate memory";
-  rijndael_setkey (ctx, key_256, sizeof(key_256));
+  rijndael_setkey (ctx, key_256, sizeof(key_256), NULL);
   rijndael_encrypt (ctx, scratch, plaintext_256);
   if (memcmp (scratch, ciphertext_256, sizeof (ciphertext_256)))
     {
diff -up libgcrypt-1.8.5/cipher/rijndael-internal.h.aes-perf libgcrypt-1.8.5/cipher/rijndael-internal.h
--- libgcrypt-1.8.5/cipher/rijndael-internal.h.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/rijndael-internal.h	2020-04-22 18:29:41.676862114 +0200
@@ -73,7 +73,7 @@
 #   define USE_PADLOCK 1
 #  endif
 # endif
-#endif /*ENABLE_PADLOCK_SUPPORT*/
+#endif /* ENABLE_PADLOCK_SUPPORT */
 
 /* USE_AESNI inidicates whether to compile with Intel AES-NI code.  We
    need the vector-size attribute which seems to be available since
@@ -102,6 +102,23 @@
 # endif
 #endif /* ENABLE_ARM_CRYPTO_SUPPORT */
 
+/* USE_PPC_CRYPTO indicates whether to enable PowerPC vector crypto
+ * accelerated code.  USE_PPC_CRYPTO_WITH_PPC9LE indicates whether to
+ * enable POWER9 optimized variant.  */
+#undef USE_PPC_CRYPTO
+#undef USE_PPC_CRYPTO_WITH_PPC9LE
+#ifdef ENABLE_PPC_CRYPTO_SUPPORT
+# if defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \
+     defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC)
+#  if __GNUC__ >= 4
+#   define USE_PPC_CRYPTO 1
+#   if !defined(WORDS_BIGENDIAN) && defined(HAVE_GCC_INLINE_ASM_PPC_ARCH_3_00)
+#    define USE_PPC_CRYPTO_WITH_PPC9LE 1
+#   endif
+#  endif
+# endif
+#endif /* ENABLE_PPC_CRYPTO_SUPPORT */
+
 struct RIJNDAEL_context_s;
 
 typedef unsigned int (*rijndael_cryptfn_t)(const struct RIJNDAEL_context_s *ctx,
@@ -150,6 +167,12 @@ typedef struct RIJNDAEL_context_s
 #ifdef USE_ARM_CE
   unsigned int use_arm_ce:1;          /* ARMv8 CE shall be used.  */
 #endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO
+  unsigned int use_ppc_crypto:1;      /* PowerPC crypto shall be used.  */
+#endif /*USE_PPC_CRYPTO*/
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+  unsigned int use_ppc9le_crypto:1;   /* POWER9 LE crypto shall be used.  */
+#endif
   rijndael_cryptfn_t encrypt_fn;
   rijndael_cryptfn_t decrypt_fn;
   rijndael_prefetchfn_t prefetch_enc_fn;
diff -up libgcrypt-1.8.5/cipher/rijndael-ppc9le.c.aes-perf libgcrypt-1.8.5/cipher/rijndael-ppc9le.c
--- libgcrypt-1.8.5/cipher/rijndael-ppc9le.c.aes-perf	2020-04-22 18:29:41.677862096 +0200
+++ libgcrypt-1.8.5/cipher/rijndael-ppc9le.c	2020-04-22 18:29:41.677862096 +0200
@@ -0,0 +1,102 @@
+/* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation
+ * Copyright (C) 2019 Shawn Landden <shawn@git.icu>
+ * Copyright (C) 2019-2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Alternatively, this code may be used in OpenSSL from The OpenSSL Project,
+ * and Cryptogams by Andy Polyakov, and if made part of a release of either
+ * or both projects, is thereafter dual-licensed under the license said project
+ * is released under.
+ */
+
+#include <config.h>
+
+#include "rijndael-internal.h"
+#include "cipher-internal.h"
+#include "bufhelp.h"
+
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+
+#include "rijndael-ppc-common.h"
+
+
+static ASM_FUNC_ATTR_INLINE block
+asm_load_be_const(void)
+{
+  static const block vec_dummy = { 0 };
+  return vec_dummy;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_be_swap(block vec, block be_bswap_const)
+{
+  (void)be_bswap_const;
+  return vec;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_load_be_noswap(unsigned long offset, const void *ptr)
+{
+  block vec;
+#if __GNUC__ >= 4
+  if (__builtin_constant_p (offset) && offset == 0)
+    __asm__ volatile ("lxvb16x %x0,0,%1\n\t"
+		      : "=wa" (vec)
+		      : "r" ((uintptr_t)ptr)
+		      : "memory");
+  else
+#endif
+    __asm__ volatile ("lxvb16x %x0,%1,%2\n\t"
+		      : "=wa" (vec)
+		      : "r" (offset), "r" ((uintptr_t)ptr)
+		      : "memory", "r0");
+  return vec;
+}
+
+static ASM_FUNC_ATTR_INLINE void
+asm_store_be_noswap(block vec, unsigned long offset, void *ptr)
+{
+#if __GNUC__ >= 4
+  if (__builtin_constant_p (offset) && offset == 0)
+    __asm__ volatile ("stxvb16x %x0,0,%1\n\t"
+		      :
+		      : "wa" (vec), "r" ((uintptr_t)ptr)
+		      : "memory");
+  else
+#endif
+    __asm__ volatile ("stxvb16x %x0,%1,%2\n\t"
+		      :
+		      : "wa" (vec), "r" (offset), "r" ((uintptr_t)ptr)
+		      : "memory", "r0");
+}
+
+
+#define GCRY_AES_PPC9LE 1
+#define ENCRYPT_BLOCK_FUNC	_gcry_aes_ppc9le_encrypt
+#define DECRYPT_BLOCK_FUNC	_gcry_aes_ppc9le_decrypt
+#define CFB_ENC_FUNC		_gcry_aes_ppc9le_cfb_enc
+#define CFB_DEC_FUNC		_gcry_aes_ppc9le_cfb_dec
+#define CBC_ENC_FUNC		_gcry_aes_ppc9le_cbc_enc
+#define CBC_DEC_FUNC		_gcry_aes_ppc9le_cbc_dec
+#define CTR_ENC_FUNC		_gcry_aes_ppc9le_ctr_enc
+#define OCB_CRYPT_FUNC		_gcry_aes_ppc9le_ocb_crypt
+#define OCB_AUTH_FUNC		_gcry_aes_ppc9le_ocb_auth
+#define XTS_CRYPT_FUNC		_gcry_aes_ppc9le_xts_crypt
+
+#include <rijndael-ppc-functions.h>
+
+#endif /* USE_PPC_CRYPTO */
diff -up libgcrypt-1.8.5/cipher/rijndael-ppc.c.aes-perf libgcrypt-1.8.5/cipher/rijndael-ppc.c
--- libgcrypt-1.8.5/cipher/rijndael-ppc.c.aes-perf	2020-04-22 18:29:41.677862096 +0200
+++ libgcrypt-1.8.5/cipher/rijndael-ppc.c	2020-04-22 18:29:41.677862096 +0200
@@ -0,0 +1,259 @@
+/* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation
+ * Copyright (C) 2019 Shawn Landden <shawn@git.icu>
+ * Copyright (C) 2019-2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Alternatively, this code may be used in OpenSSL from The OpenSSL Project,
+ * and Cryptogams by Andy Polyakov, and if made part of a release of either
+ * or both projects, is thereafter dual-licensed under the license said project
+ * is released under.
+ */
+
+#include <config.h>
+
+#include "rijndael-internal.h"
+#include "cipher-internal.h"
+#include "bufhelp.h"
+
+#ifdef USE_PPC_CRYPTO
+
+#include "rijndael-ppc-common.h"
+
+
+#ifdef WORDS_BIGENDIAN
+static const block vec_bswap32_const =
+  { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 };
+#else
+static const block vec_bswap32_const_neg =
+  { ~3, ~2, ~1, ~0, ~7, ~6, ~5, ~4, ~11, ~10, ~9, ~8, ~15, ~14, ~13, ~12 };
+#endif
+
+
+static ASM_FUNC_ATTR_INLINE block
+asm_load_be_const(void)
+{
+#ifndef WORDS_BIGENDIAN
+  return ALIGNED_LOAD (&vec_bswap32_const_neg, 0);
+#else
+  static const block vec_dummy = { 0 };
+  return vec_dummy;
+#endif
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_be_swap(block vec, block be_bswap_const)
+{
+  (void)be_bswap_const;
+#ifndef WORDS_BIGENDIAN
+  return asm_vperm1 (vec, be_bswap_const);
+#else
+  return vec;
+#endif
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_load_be_noswap(unsigned long offset, const void *ptr)
+{
+  block vec;
+#if __GNUC__ >= 4
+  if (__builtin_constant_p (offset) && offset == 0)
+    __asm__ volatile ("lxvw4x %x0,0,%1\n\t"
+		      : "=wa" (vec)
+		      : "r" ((uintptr_t)ptr)
+		      : "memory");
+  else
+#endif
+    __asm__ volatile ("lxvw4x %x0,%1,%2\n\t"
+		      : "=wa" (vec)
+		      : "r" (offset), "r" ((uintptr_t)ptr)
+		      : "memory", "r0");
+  /* NOTE: vec needs to be be-swapped using 'asm_be_swap' by caller */
+  return vec;
+}
+
+static ASM_FUNC_ATTR_INLINE void
+asm_store_be_noswap(block vec, unsigned long offset, void *ptr)
+{
+  /* NOTE: vec be-swapped using 'asm_be_swap' by caller */
+#if __GNUC__ >= 4
+  if (__builtin_constant_p (offset) && offset == 0)
+    __asm__ volatile ("stxvw4x %x0,0,%1\n\t"
+		      :
+		      : "wa" (vec), "r" ((uintptr_t)ptr)
+		      : "memory");
+  else
+#endif
+    __asm__ volatile ("stxvw4x %x0,%1,%2\n\t"
+		      :
+		      : "wa" (vec), "r" (offset), "r" ((uintptr_t)ptr)
+		      : "memory", "r0");
+}
+
+
+static ASM_FUNC_ATTR_INLINE u32
+_gcry_aes_sbox4_ppc8(u32 fourbytes)
+{
+  union
+    {
+      PROPERLY_ALIGNED_TYPE dummy;
+      block data_vec;
+      u32 data32[4];
+    } u;
+
+  u.data32[0] = fourbytes;
+  u.data_vec = vec_sbox_be(u.data_vec);
+  return u.data32[0];
+}
+
+void
+_gcry_aes_ppc8_setkey (RIJNDAEL_context *ctx, const byte *key)
+{
+  const block bige_const = asm_load_be_const();
+  union
+    {
+      PROPERLY_ALIGNED_TYPE dummy;
+      byte data[MAXKC][4];
+      u32 data32[MAXKC];
+    } tkk[2];
+  unsigned int rounds = ctx->rounds;
+  int KC = rounds - 6;
+  unsigned int keylen = KC * 4;
+  u128_t *ekey = (u128_t *)(void *)ctx->keyschenc;
+  unsigned int i, r, t;
+  byte rcon = 1;
+  int j;
+#define k      tkk[0].data
+#define k_u32  tkk[0].data32
+#define tk     tkk[1].data
+#define tk_u32 tkk[1].data32
+#define W      (ctx->keyschenc)
+#define W_u32  (ctx->keyschenc32)
+
+  for (i = 0; i < keylen; i++)
+    {
+      k[i >> 2][i & 3] = key[i];
+    }
+
+  for (j = KC-1; j >= 0; j--)
+    {
+      tk_u32[j] = k_u32[j];
+    }
+  r = 0;
+  t = 0;
+  /* Copy values into round key array.  */
+  for (j = 0; (j < KC) && (r < rounds + 1); )
+    {
+      for (; (j < KC) && (t < 4); j++, t++)
+        {
+          W_u32[r][t] = le_bswap32(tk_u32[j]);
+        }
+      if (t == 4)
+        {
+          r++;
+          t = 0;
+        }
+    }
+  while (r < rounds + 1)
+    {
+      tk_u32[0] ^=
+	le_bswap32(
+	  _gcry_aes_sbox4_ppc8(rol(le_bswap32(tk_u32[KC - 1]), 24)) ^ rcon);
+
+      if (KC != 8)
+        {
+          for (j = 1; j < KC; j++)
+            {
+              tk_u32[j] ^= tk_u32[j-1];
+            }
+        }
+      else
+        {
+          for (j = 1; j < KC/2; j++)
+            {
+              tk_u32[j] ^= tk_u32[j-1];
+            }
+
+          tk_u32[KC/2] ^=
+	    le_bswap32(_gcry_aes_sbox4_ppc8(le_bswap32(tk_u32[KC/2 - 1])));
+
+          for (j = KC/2 + 1; j < KC; j++)
+            {
+              tk_u32[j] ^= tk_u32[j-1];
+            }
+        }
+
+      /* Copy values into round key array.  */
+      for (j = 0; (j < KC) && (r < rounds + 1); )
+        {
+          for (; (j < KC) && (t < 4); j++, t++)
+            {
+              W_u32[r][t] = le_bswap32(tk_u32[j]);
+            }
+          if (t == 4)
+            {
+              r++;
+              t = 0;
+            }
+        }
+
+      rcon = (rcon << 1) ^ (-(rcon >> 7) & 0x1b);
+    }
+
+  /* Store in big-endian order. */
+  for (r = 0; r <= rounds; r++)
+    {
+#ifndef WORDS_BIGENDIAN
+      VEC_STORE_BE(ekey, r, ALIGNED_LOAD (ekey, r), bige_const);
+#else
+      block rvec = ALIGNED_LOAD (ekey, r);
+      ALIGNED_STORE (ekey, r,
+                     vec_perm(rvec, rvec, vec_bswap32_const));
+      (void)bige_const;
+#endif
+    }
+
+#undef W
+#undef tk
+#undef k
+#undef W_u32
+#undef tk_u32
+#undef k_u32
+  wipememory(&tkk, sizeof(tkk));
+}
+
+void
+_gcry_aes_ppc8_prepare_decryption (RIJNDAEL_context *ctx)
+{
+  internal_aes_ppc_prepare_decryption (ctx);
+}
+
+
+#define GCRY_AES_PPC8 1
+#define ENCRYPT_BLOCK_FUNC	_gcry_aes_ppc8_encrypt
+#define DECRYPT_BLOCK_FUNC	_gcry_aes_ppc8_decrypt
+#define CFB_ENC_FUNC		_gcry_aes_ppc8_cfb_enc
+#define CFB_DEC_FUNC		_gcry_aes_ppc8_cfb_dec
+#define CBC_ENC_FUNC		_gcry_aes_ppc8_cbc_enc
+#define CBC_DEC_FUNC		_gcry_aes_ppc8_cbc_dec
+#define CTR_ENC_FUNC		_gcry_aes_ppc8_ctr_enc
+#define OCB_CRYPT_FUNC		_gcry_aes_ppc8_ocb_crypt
+#define OCB_AUTH_FUNC		_gcry_aes_ppc8_ocb_auth
+#define XTS_CRYPT_FUNC		_gcry_aes_ppc8_xts_crypt
+
+#include <rijndael-ppc-functions.h>
+
+#endif /* USE_PPC_CRYPTO */
diff -up libgcrypt-1.8.5/cipher/rijndael-ppc-common.h.aes-perf libgcrypt-1.8.5/cipher/rijndael-ppc-common.h
--- libgcrypt-1.8.5/cipher/rijndael-ppc-common.h.aes-perf	2020-04-22 18:29:41.678862076 +0200
+++ libgcrypt-1.8.5/cipher/rijndael-ppc-common.h	2020-04-22 18:29:41.678862076 +0200
@@ -0,0 +1,342 @@
+/* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation
+ * Copyright (C) 2019 Shawn Landden <shawn@git.icu>
+ * Copyright (C) 2019-2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Alternatively, this code may be used in OpenSSL from The OpenSSL Project,
+ * and Cryptogams by Andy Polyakov, and if made part of a release of either
+ * or both projects, is thereafter dual-licensed under the license said project
+ * is released under.
+ */
+
+#ifndef G10_RIJNDAEL_PPC_COMMON_H
+#define G10_RIJNDAEL_PPC_COMMON_H
+
+#include <altivec.h>
+
+
+typedef vector unsigned char block;
+
+typedef union
+{
+  u32 data32[4];
+} __attribute__((packed, aligned(1), may_alias)) u128_t;
+
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INLINE __attribute__((noinline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR          NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE   ASM_FUNC_ATTR ALWAYS_INLINE
+#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE
+
+
+#define ALIGNED_LOAD(in_ptr, offs) \
+  (asm_aligned_ld ((offs) * 16, (const void *)(in_ptr)))
+
+#define ALIGNED_STORE(out_ptr, offs, vec) \
+  (asm_aligned_st ((vec), (offs) * 16, (void *)(out_ptr)))
+
+#define VEC_BE_SWAP(vec, bige_const) (asm_be_swap ((vec), (bige_const)))
+
+#define VEC_LOAD_BE(in_ptr, offs, bige_const) \
+  (asm_be_swap (asm_load_be_noswap ((offs) * 16, (const void *)(in_ptr)), \
+		bige_const))
+
+#define VEC_LOAD_BE_NOSWAP(in_ptr, offs) \
+  (asm_load_be_noswap ((offs) * 16, (const unsigned char *)(in_ptr)))
+
+#define VEC_STORE_BE(out_ptr, offs, vec, bige_const) \
+  (asm_store_be_noswap (asm_be_swap ((vec), (bige_const)), (offs) * 16, \
+		        (void *)(out_ptr)))
+
+#define VEC_STORE_BE_NOSWAP(out_ptr, offs, vec) \
+  (asm_store_be_noswap ((vec), (offs) * 16, (void *)(out_ptr)))
+
+
+#define ROUND_KEY_VARIABLES \
+  block rkey0, rkeylast
+
+#define PRELOAD_ROUND_KEYS(nrounds) \
+  do { \
+    rkey0 = ALIGNED_LOAD (rk, 0); \
+    rkeylast = ALIGNED_LOAD (rk, nrounds); \
+  } while (0)
+
+#define AES_ENCRYPT(blk, nrounds) \
+  do { \
+    blk ^= rkey0; \
+    blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 1)); \
+    blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 2)); \
+    blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 3)); \
+    blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 4)); \
+    blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 5)); \
+    blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 6)); \
+    blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 7)); \
+    blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 8)); \
+    blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 9)); \
+    if (nrounds >= 12) \
+      { \
+	blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 10)); \
+	blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 11)); \
+	if (rounds > 12) \
+	  { \
+	    blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 12)); \
+	    blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 13)); \
+	  } \
+      } \
+    blk = asm_cipherlast_be (blk, rkeylast); \
+  } while (0)
+
+#define AES_DECRYPT(blk, nrounds) \
+  do { \
+    blk ^= rkey0; \
+    blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 1)); \
+    blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 2)); \
+    blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 3)); \
+    blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 4)); \
+    blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 5)); \
+    blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 6)); \
+    blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 7)); \
+    blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 8)); \
+    blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 9)); \
+    if (nrounds >= 12) \
+      { \
+	blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 10)); \
+	blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 11)); \
+	if (rounds > 12) \
+	  { \
+	    blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 12)); \
+	    blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 13)); \
+	  } \
+      } \
+    blk = asm_ncipherlast_be (blk, rkeylast); \
+  } while (0)
+
+
+#define ROUND_KEY_VARIABLES_ALL \
+  block rkey0, rkey1, rkey2, rkey3, rkey4, rkey5, rkey6, rkey7, rkey8, \
+        rkey9, rkey10, rkey11, rkey12, rkey13, rkeylast
+
+#define PRELOAD_ROUND_KEYS_ALL(nrounds) \
+  do { \
+    rkey0 = ALIGNED_LOAD (rk, 0); \
+    rkey1 = ALIGNED_LOAD (rk, 1); \
+    rkey2 = ALIGNED_LOAD (rk, 2); \
+    rkey3 = ALIGNED_LOAD (rk, 3); \
+    rkey4 = ALIGNED_LOAD (rk, 4); \
+    rkey5 = ALIGNED_LOAD (rk, 5); \
+    rkey6 = ALIGNED_LOAD (rk, 6); \
+    rkey7 = ALIGNED_LOAD (rk, 7); \
+    rkey8 = ALIGNED_LOAD (rk, 8); \
+    rkey9 = ALIGNED_LOAD (rk, 9); \
+    if (nrounds >= 12) \
+      { \
+	rkey10 = ALIGNED_LOAD (rk, 10); \
+	rkey11 = ALIGNED_LOAD (rk, 11); \
+	if (rounds > 12) \
+	  { \
+	    rkey12 = ALIGNED_LOAD (rk, 12); \
+	    rkey13 = ALIGNED_LOAD (rk, 13); \
+	  } \
+      } \
+    rkeylast = ALIGNED_LOAD (rk, nrounds); \
+  } while (0)
+
+#define AES_ENCRYPT_ALL(blk, nrounds) \
+  do { \
+    blk ^= rkey0; \
+    blk = asm_cipher_be (blk, rkey1); \
+    blk = asm_cipher_be (blk, rkey2); \
+    blk = asm_cipher_be (blk, rkey3); \
+    blk = asm_cipher_be (blk, rkey4); \
+    blk = asm_cipher_be (blk, rkey5); \
+    blk = asm_cipher_be (blk, rkey6); \
+    blk = asm_cipher_be (blk, rkey7); \
+    blk = asm_cipher_be (blk, rkey8); \
+    blk = asm_cipher_be (blk, rkey9); \
+    if (nrounds >= 12) \
+      { \
+	blk = asm_cipher_be (blk, rkey10); \
+	blk = asm_cipher_be (blk, rkey11); \
+	if (rounds > 12) \
+	  { \
+	    blk = asm_cipher_be (blk, rkey12); \
+	    blk = asm_cipher_be (blk, rkey13); \
+	  } \
+      } \
+    blk = asm_cipherlast_be (blk, rkeylast); \
+  } while (0)
+
+
+static ASM_FUNC_ATTR_INLINE block
+asm_aligned_ld(unsigned long offset, const void *ptr)
+{
+  block vec;
+#if __GNUC__ >= 4
+  if (__builtin_constant_p (offset) && offset == 0)
+    __asm__ volatile ("lvx %0,0,%1\n\t"
+		      : "=v" (vec)
+		      : "r" ((uintptr_t)ptr)
+		      : "memory");
+  else
+#endif
+    __asm__ volatile ("lvx %0,%1,%2\n\t"
+		      : "=v" (vec)
+		      : "r" (offset), "r" ((uintptr_t)ptr)
+		      : "memory", "r0");
+  return vec;
+}
+
+static ASM_FUNC_ATTR_INLINE void
+asm_aligned_st(block vec, unsigned long offset, void *ptr)
+{
+#if __GNUC__ >= 4
+  if (__builtin_constant_p (offset) && offset == 0)
+    __asm__ volatile ("stvx %0,0,%1\n\t"
+		      :
+		      : "v" (vec), "r" ((uintptr_t)ptr)
+		      : "memory");
+  else
+#endif
+    __asm__ volatile ("stvx %0,%1,%2\n\t"
+		      :
+		      : "v" (vec), "r" (offset), "r" ((uintptr_t)ptr)
+		      : "memory", "r0");
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_vperm1(block vec, block mask)
+{
+  block o;
+  __asm__ volatile ("vperm %0,%1,%1,%2\n\t"
+		    : "=v" (o)
+		    : "v" (vec), "v" (mask));
+  return o;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_add_uint128(block a, block b)
+{
+  block res;
+  __asm__ volatile ("vadduqm %0,%1,%2\n\t"
+		    : "=v" (res)
+		    : "v" (a), "v" (b));
+  return res;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_add_uint64(block a, block b)
+{
+  block res;
+  __asm__ volatile ("vaddudm %0,%1,%2\n\t"
+		    : "=v" (res)
+		    : "v" (a), "v" (b));
+  return res;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_sra_int64(block a, block b)
+{
+  block res;
+  __asm__ volatile ("vsrad %0,%1,%2\n\t"
+		    : "=v" (res)
+		    : "v" (a), "v" (b));
+  return res;
+}
+
+static block
+asm_swap_uint64_halfs(block a)
+{
+  block res;
+  __asm__ volatile ("xxswapd %x0, %x1"
+		    : "=wa" (res)
+		    : "wa" (a));
+  return res;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_xor(block a, block b)
+{
+  block res;
+  __asm__ volatile ("vxor %0,%1,%2\n\t"
+		    : "=v" (res)
+		    : "v" (a), "v" (b));
+  return res;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_cipher_be(block b, block rk)
+{
+  block o;
+  __asm__ volatile ("vcipher %0, %1, %2\n\t"
+		    : "=v" (o)
+		    : "v" (b), "v" (rk));
+  return o;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_cipherlast_be(block b, block rk)
+{
+  block o;
+  __asm__ volatile ("vcipherlast %0, %1, %2\n\t"
+		    : "=v" (o)
+		    : "v" (b), "v" (rk));
+  return o;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_ncipher_be(block b, block rk)
+{
+  block o;
+  __asm__ volatile ("vncipher %0, %1, %2\n\t"
+		    : "=v" (o)
+		    : "v" (b), "v" (rk));
+  return o;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_ncipherlast_be(block b, block rk)
+{
+  block o;
+  __asm__ volatile ("vncipherlast %0, %1, %2\n\t"
+		    : "=v" (o)
+		    : "v" (b), "v" (rk));
+  return o;
+}
+
+
+/* Make a decryption key from an encryption key. */
+static ASM_FUNC_ATTR_INLINE void
+internal_aes_ppc_prepare_decryption (RIJNDAEL_context *ctx)
+{
+  u128_t *ekey = (u128_t *)(void *)ctx->keyschenc;
+  u128_t *dkey = (u128_t *)(void *)ctx->keyschdec;
+  int rounds = ctx->rounds;
+  int rr;
+  int r;
+
+  r = 0;
+  rr = rounds;
+  for (r = 0, rr = rounds; r <= rounds; r++, rr--)
+    {
+      ALIGNED_STORE (dkey, r, ALIGNED_LOAD (ekey, rr));
+    }
+}
+
+#endif /* G10_RIJNDAEL_PPC_COMMON_H */
diff -up libgcrypt-1.8.5/cipher/rijndael-ppc-functions.h.aes-perf libgcrypt-1.8.5/cipher/rijndael-ppc-functions.h
--- libgcrypt-1.8.5/cipher/rijndael-ppc-functions.h.aes-perf	2020-04-22 18:29:41.679862057 +0200
+++ libgcrypt-1.8.5/cipher/rijndael-ppc-functions.h	2020-04-22 18:29:41.679862057 +0200
@@ -0,0 +1,2020 @@
+/* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation
+ * Copyright (C) 2019 Shawn Landden <shawn@git.icu>
+ * Copyright (C) 2019-2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Alternatively, this code may be used in OpenSSL from The OpenSSL Project,
+ * and Cryptogams by Andy Polyakov, and if made part of a release of either
+ * or both projects, is thereafter dual-licensed under the license said project
+ * is released under.
+ */
+
+unsigned int ENCRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx,
+				 unsigned char *out,
+				 const unsigned char *in)
+{
+  const block bige_const = asm_load_be_const();
+  const u128_t *rk = (u128_t *)&ctx->keyschenc;
+  int rounds = ctx->rounds;
+  ROUND_KEY_VARIABLES;
+  block b;
+
+  b = VEC_LOAD_BE (in, 0, bige_const);
+
+  PRELOAD_ROUND_KEYS (rounds);
+
+  AES_ENCRYPT (b, rounds);
+  VEC_STORE_BE (out, 0, b, bige_const);
+
+  return 0; /* does not use stack */
+}
+
+
+unsigned int DECRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx,
+				 unsigned char *out,
+				 const unsigned char *in)
+{
+  const block bige_const = asm_load_be_const();
+  const u128_t *rk = (u128_t *)&ctx->keyschdec;
+  int rounds = ctx->rounds;
+  ROUND_KEY_VARIABLES;
+  block b;
+
+  b = VEC_LOAD_BE (in, 0, bige_const);
+
+  PRELOAD_ROUND_KEYS (rounds);
+
+  AES_DECRYPT (b, rounds);
+  VEC_STORE_BE (out, 0, b, bige_const);
+
+  return 0; /* does not use stack */
+}
+
+
+void CFB_ENC_FUNC (void *context, unsigned char *iv_arg,
+		   void *outbuf_arg, const void *inbuf_arg,
+		   size_t nblocks)
+{
+  const block bige_const = asm_load_be_const();
+  RIJNDAEL_context *ctx = context;
+  const u128_t *rk = (u128_t *)&ctx->keyschenc;
+  const u128_t *in = (const u128_t *)inbuf_arg;
+  u128_t *out = (u128_t *)outbuf_arg;
+  int rounds = ctx->rounds;
+  ROUND_KEY_VARIABLES_ALL;
+  block rkeylast_orig;
+  block iv;
+
+  iv = VEC_LOAD_BE (iv_arg, 0, bige_const);
+
+  PRELOAD_ROUND_KEYS_ALL (rounds);
+  rkeylast_orig = rkeylast;
+
+  for (; nblocks >= 2; nblocks -= 2)
+    {
+      block in2, iv1;
+
+      rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, 0, bige_const);
+      in2 = VEC_LOAD_BE (in + 1, 0, bige_const);
+      in += 2;
+
+      AES_ENCRYPT_ALL (iv, rounds);
+
+      iv1 = iv;
+      rkeylast = rkeylast_orig ^ in2;
+
+      AES_ENCRYPT_ALL (iv, rounds);
+
+      VEC_STORE_BE (out++, 0, iv1, bige_const);
+      VEC_STORE_BE (out++, 0, iv, bige_const);
+    }
+
+  for (; nblocks; nblocks--)
+    {
+      rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in++, 0, bige_const);
+
+      AES_ENCRYPT_ALL (iv, rounds);
+
+      VEC_STORE_BE (out++, 0, iv, bige_const);
+    }
+
+  VEC_STORE_BE (iv_arg, 0, iv, bige_const);
+}
+
+void CFB_DEC_FUNC (void *context, unsigned char *iv_arg,
+		   void *outbuf_arg, const void *inbuf_arg,
+		   size_t nblocks)
+{
+  const block bige_const = asm_load_be_const();
+  RIJNDAEL_context *ctx = context;
+  const u128_t *rk = (u128_t *)&ctx->keyschenc;
+  const u128_t *in = (const u128_t *)inbuf_arg;
+  u128_t *out = (u128_t *)outbuf_arg;
+  int rounds = ctx->rounds;
+  ROUND_KEY_VARIABLES;
+  block rkeylast_orig;
+  block iv, b, bin;
+  block in0, in1, in2, in3, in4, in5, in6, in7;
+  block b0, b1, b2, b3, b4, b5, b6, b7;
+  block rkey;
+
+  iv = VEC_LOAD_BE (iv_arg, 0, bige_const);
+
+  PRELOAD_ROUND_KEYS (rounds);
+  rkeylast_orig = rkeylast;
+
+  for (; nblocks >= 8; nblocks -= 8)
+    {
+      in0 = iv;
+      in1 = VEC_LOAD_BE_NOSWAP (in, 0);
+      in2 = VEC_LOAD_BE_NOSWAP (in, 1);
+      in3 = VEC_LOAD_BE_NOSWAP (in, 2);
+      in4 = VEC_LOAD_BE_NOSWAP (in, 3);
+      in1 = VEC_BE_SWAP (in1, bige_const);
+      in2 = VEC_BE_SWAP (in2, bige_const);
+      in5 = VEC_LOAD_BE_NOSWAP (in, 4);
+      in6 = VEC_LOAD_BE_NOSWAP (in, 5);
+      in3 = VEC_BE_SWAP (in3, bige_const);
+      in4 = VEC_BE_SWAP (in4, bige_const);
+      in7 = VEC_LOAD_BE_NOSWAP (in, 6);
+      iv = VEC_LOAD_BE_NOSWAP (in, 7);
+      in += 8;
+      in5 = VEC_BE_SWAP (in5, bige_const);
+      in6 = VEC_BE_SWAP (in6, bige_const);
+      b0 = asm_xor (rkey0, in0);
+      b1 = asm_xor (rkey0, in1);
+      in7 = VEC_BE_SWAP (in7, bige_const);
+      iv = VEC_BE_SWAP (iv, bige_const);
+      b2 = asm_xor (rkey0, in2);
+      b3 = asm_xor (rkey0, in3);
+      b4 = asm_xor (rkey0, in4);
+      b5 = asm_xor (rkey0, in5);
+      b6 = asm_xor (rkey0, in6);
+      b7 = asm_xor (rkey0, in7);
+
+#define DO_ROUND(r) \
+	      rkey = ALIGNED_LOAD (rk, r); \
+	      b0 = asm_cipher_be (b0, rkey); \
+	      b1 = asm_cipher_be (b1, rkey); \
+	      b2 = asm_cipher_be (b2, rkey); \
+	      b3 = asm_cipher_be (b3, rkey); \
+	      b4 = asm_cipher_be (b4, rkey); \
+	      b5 = asm_cipher_be (b5, rkey); \
+	      b6 = asm_cipher_be (b6, rkey); \
+	      b7 = asm_cipher_be (b7, rkey);
+
+      DO_ROUND(1);
+      DO_ROUND(2);
+      DO_ROUND(3);
+      DO_ROUND(4);
+      DO_ROUND(5);
+      DO_ROUND(6);
+      DO_ROUND(7);
+      DO_ROUND(8);
+      DO_ROUND(9);
+      if (rounds >= 12)
+	{
+	  DO_ROUND(10);
+	  DO_ROUND(11);
+	  if (rounds > 12)
+	    {
+	      DO_ROUND(12);
+	      DO_ROUND(13);
+	    }
+	}
+
+#undef DO_ROUND
+
+      in1 = asm_xor (rkeylast, in1);
+      in2 = asm_xor (rkeylast, in2);
+      in3 = asm_xor (rkeylast, in3);
+      in4 = asm_xor (rkeylast, in4);
+      b0 = asm_cipherlast_be (b0, in1);
+      b1 = asm_cipherlast_be (b1, in2);
+      in5 = asm_xor (rkeylast, in5);
+      in6 = asm_xor (rkeylast, in6);
+      b2 = asm_cipherlast_be (b2, in3);
+      b3 = asm_cipherlast_be (b3, in4);
+      in7 = asm_xor (rkeylast, in7);
+      in0 = asm_xor (rkeylast, iv);
+      b0 = VEC_BE_SWAP (b0, bige_const);
+      b1 = VEC_BE_SWAP (b1, bige_const);
+      b4 = asm_cipherlast_be (b4, in5);
+      b5 = asm_cipherlast_be (b5, in6);
+      b2 = VEC_BE_SWAP (b2, bige_const);
+      b3 = VEC_BE_SWAP (b3, bige_const);
+      b6 = asm_cipherlast_be (b6, in7);
+      b7 = asm_cipherlast_be (b7, in0);
+      b4 = VEC_BE_SWAP (b4, bige_const);
+      b5 = VEC_BE_SWAP (b5, bige_const);
+      b6 = VEC_BE_SWAP (b6, bige_const);
+      b7 = VEC_BE_SWAP (b7, bige_const);
+      VEC_STORE_BE_NOSWAP (out, 0, b0);
+      VEC_STORE_BE_NOSWAP (out, 1, b1);
+      VEC_STORE_BE_NOSWAP (out, 2, b2);
+      VEC_STORE_BE_NOSWAP (out, 3, b3);
+      VEC_STORE_BE_NOSWAP (out, 4, b4);
+      VEC_STORE_BE_NOSWAP (out, 5, b5);
+      VEC_STORE_BE_NOSWAP (out, 6, b6);
+      VEC_STORE_BE_NOSWAP (out, 7, b7);
+      out += 8;
+    }
+
+  if (nblocks >= 4)
+    {
+      in0 = iv;
+      in1 = VEC_LOAD_BE (in, 0, bige_const);
+      in2 = VEC_LOAD_BE (in, 1, bige_const);
+      in3 = VEC_LOAD_BE (in, 2, bige_const);
+      iv = VEC_LOAD_BE (in, 3, bige_const);
+
+      b0 = asm_xor (rkey0, in0);
+      b1 = asm_xor (rkey0, in1);
+      b2 = asm_xor (rkey0, in2);
+      b3 = asm_xor (rkey0, in3);
+
+#define DO_ROUND(r) \
+	      rkey = ALIGNED_LOAD (rk, r); \
+	      b0 = asm_cipher_be (b0, rkey); \
+	      b1 = asm_cipher_be (b1, rkey); \
+	      b2 = asm_cipher_be (b2, rkey); \
+	      b3 = asm_cipher_be (b3, rkey);
+
+      DO_ROUND(1);
+      DO_ROUND(2);
+      DO_ROUND(3);
+      DO_ROUND(4);
+      DO_ROUND(5);
+      DO_ROUND(6);
+      DO_ROUND(7);
+      DO_ROUND(8);
+      DO_ROUND(9);
+      if (rounds >= 12)
+	{
+	  DO_ROUND(10);
+	  DO_ROUND(11);
+	  if (rounds > 12)
+	    {
+	      DO_ROUND(12);
+	      DO_ROUND(13);
+	    }
+	}
+
+#undef DO_ROUND
+
+      in1 = asm_xor (rkeylast, in1);
+      in2 = asm_xor (rkeylast, in2);
+      in3 = asm_xor (rkeylast, in3);
+      in0 = asm_xor (rkeylast, iv);
+      b0 = asm_cipherlast_be (b0, in1);
+      b1 = asm_cipherlast_be (b1, in2);
+      b2 = asm_cipherlast_be (b2, in3);
+      b3 = asm_cipherlast_be (b3, in0);
+      VEC_STORE_BE (out, 0, b0, bige_const);
+      VEC_STORE_BE (out, 1, b1, bige_const);
+      VEC_STORE_BE (out, 2, b2, bige_const);
+      VEC_STORE_BE (out, 3, b3, bige_const);
+
+      in += 4;
+      out += 4;
+      nblocks -= 4;
+    }
+
+  for (; nblocks; nblocks--)
+    {
+      bin = VEC_LOAD_BE (in, 0, bige_const);
+      rkeylast = rkeylast_orig ^ bin;
+      b = iv;
+      iv = bin;
+
+      AES_ENCRYPT (b, rounds);
+
+      VEC_STORE_BE (out, 0, b, bige_const);
+
+      out++;
+      in++;
+    }
+
+  VEC_STORE_BE (iv_arg, 0, iv, bige_const);
+}
+
+
+void CBC_ENC_FUNC (void *context, unsigned char *iv_arg,
+		   void *outbuf_arg, const void *inbuf_arg,
+		   size_t nblocks, int cbc_mac)
+{
+  const block bige_const = asm_load_be_const();
+  RIJNDAEL_context *ctx = context;
+  const u128_t *rk = (u128_t *)&ctx->keyschenc;
+  const u128_t *in = (const u128_t *)inbuf_arg;
+  byte *out = (byte *)outbuf_arg;
+  int rounds = ctx->rounds;
+  ROUND_KEY_VARIABLES_ALL;
+  block lastiv, b;
+  unsigned int outadd = -(!cbc_mac) & 16;
+
+  lastiv = VEC_LOAD_BE (iv_arg, 0, bige_const);
+
+  PRELOAD_ROUND_KEYS_ALL (rounds);
+
+  for (; nblocks >= 2; nblocks -= 2)
+    {
+      block in2, lastiv1;
+
+      b = lastiv ^ VEC_LOAD_BE (in, 0, bige_const);
+      in2 = VEC_LOAD_BE (in + 1, 0, bige_const);
+      in += 2;
+
+      AES_ENCRYPT_ALL (b, rounds);
+
+      lastiv1 = b;
+      b = lastiv1 ^ in2;
+
+      AES_ENCRYPT_ALL (b, rounds);
+
+      lastiv = b;
+      VEC_STORE_BE ((u128_t *)out, 0, lastiv1, bige_const);
+      out += outadd;
+      VEC_STORE_BE ((u128_t *)out, 0, lastiv, bige_const);
+      out += outadd;
+    }
+
+  for (; nblocks; nblocks--)
+    {
+      b = lastiv ^ VEC_LOAD_BE (in++, 0, bige_const);
+
+      AES_ENCRYPT_ALL (b, rounds);
+
+      lastiv = b;
+      VEC_STORE_BE ((u128_t *)out, 0, b, bige_const);
+      out += outadd;
+    }
+
+  VEC_STORE_BE (iv_arg, 0, lastiv, bige_const);
+}
+
+void CBC_DEC_FUNC (void *context, unsigned char *iv_arg,
+		   void *outbuf_arg, const void *inbuf_arg,
+		   size_t nblocks)
+{
+  const block bige_const = asm_load_be_const();
+  RIJNDAEL_context *ctx = context;
+  const u128_t *rk = (u128_t *)&ctx->keyschdec;
+  const u128_t *in = (const u128_t *)inbuf_arg;
+  u128_t *out = (u128_t *)outbuf_arg;
+  int rounds = ctx->rounds;
+  ROUND_KEY_VARIABLES;
+  block rkeylast_orig;
+  block in0, in1, in2, in3, in4, in5, in6, in7;
+  block b0, b1, b2, b3, b4, b5, b6, b7;
+  block rkey;
+  block iv, b;
+
+  if (!ctx->decryption_prepared)
+    {
+      internal_aes_ppc_prepare_decryption (ctx);
+      ctx->decryption_prepared = 1;
+    }
+
+  iv = VEC_LOAD_BE (iv_arg, 0, bige_const);
+
+  PRELOAD_ROUND_KEYS (rounds);
+  rkeylast_orig = rkeylast;
+
+  for (; nblocks >= 8; nblocks -= 8)
+    {
+      in0 = VEC_LOAD_BE_NOSWAP (in, 0);
+      in1 = VEC_LOAD_BE_NOSWAP (in, 1);
+      in2 = VEC_LOAD_BE_NOSWAP (in, 2);
+      in3 = VEC_LOAD_BE_NOSWAP (in, 3);
+      in0 = VEC_BE_SWAP (in0, bige_const);
+      in1 = VEC_BE_SWAP (in1, bige_const);
+      in4 = VEC_LOAD_BE_NOSWAP (in, 4);
+      in5 = VEC_LOAD_BE_NOSWAP (in, 5);
+      in2 = VEC_BE_SWAP (in2, bige_const);
+      in3 = VEC_BE_SWAP (in3, bige_const);
+      in6 = VEC_LOAD_BE_NOSWAP (in, 6);
+      in7 = VEC_LOAD_BE_NOSWAP (in, 7);
+      in += 8;
+      b0 = asm_xor (rkey0, in0);
+      b1 = asm_xor (rkey0, in1);
+      in4 = VEC_BE_SWAP (in4, bige_const);
+      in5 = VEC_BE_SWAP (in5, bige_const);
+      b2 = asm_xor (rkey0, in2);
+      b3 = asm_xor (rkey0, in3);
+      in6 = VEC_BE_SWAP (in6, bige_const);
+      in7 = VEC_BE_SWAP (in7, bige_const);
+      b4 = asm_xor (rkey0, in4);
+      b5 = asm_xor (rkey0, in5);
+      b6 = asm_xor (rkey0, in6);
+      b7 = asm_xor (rkey0, in7);
+
+#define DO_ROUND(r) \
+	      rkey = ALIGNED_LOAD (rk, r); \
+	      b0 = asm_ncipher_be (b0, rkey); \
+	      b1 = asm_ncipher_be (b1, rkey); \
+	      b2 = asm_ncipher_be (b2, rkey); \
+	      b3 = asm_ncipher_be (b3, rkey); \
+	      b4 = asm_ncipher_be (b4, rkey); \
+	      b5 = asm_ncipher_be (b5, rkey); \
+	      b6 = asm_ncipher_be (b6, rkey); \
+	      b7 = asm_ncipher_be (b7, rkey);
+
+      DO_ROUND(1);
+      DO_ROUND(2);
+      DO_ROUND(3);
+      DO_ROUND(4);
+      DO_ROUND(5);
+      DO_ROUND(6);
+      DO_ROUND(7);
+      DO_ROUND(8);
+      DO_ROUND(9);
+      if (rounds >= 12)
+	{
+	  DO_ROUND(10);
+	  DO_ROUND(11);
+	  if (rounds > 12)
+	    {
+	      DO_ROUND(12);
+	      DO_ROUND(13);
+	    }
+	}
+
+#undef DO_ROUND
+
+      iv = asm_xor (rkeylast, iv);
+      in0 = asm_xor (rkeylast, in0);
+      in1 = asm_xor (rkeylast, in1);
+      in2 = asm_xor (rkeylast, in2);
+      b0 = asm_ncipherlast_be (b0, iv);
+      iv = in7;
+      b1 = asm_ncipherlast_be (b1, in0);
+      in3 = asm_xor (rkeylast, in3);
+      in4 = asm_xor (rkeylast, in4);
+      b2 = asm_ncipherlast_be (b2, in1);
+      b3 = asm_ncipherlast_be (b3, in2);
+      in5 = asm_xor (rkeylast, in5);
+      in6 = asm_xor (rkeylast, in6);
+      b0 = VEC_BE_SWAP (b0, bige_const);
+      b1 = VEC_BE_SWAP (b1, bige_const);
+      b4 = asm_ncipherlast_be (b4, in3);
+      b5 = asm_ncipherlast_be (b5, in4);
+      b2 = VEC_BE_SWAP (b2, bige_const);
+      b3 = VEC_BE_SWAP (b3, bige_const);
+      b6 = asm_ncipherlast_be (b6, in5);
+      b7 = asm_ncipherlast_be (b7, in6);
+      b4 = VEC_BE_SWAP (b4, bige_const);
+      b5 = VEC_BE_SWAP (b5, bige_const);
+      b6 = VEC_BE_SWAP (b6, bige_const);
+      b7 = VEC_BE_SWAP (b7, bige_const);
+      VEC_STORE_BE_NOSWAP (out, 0, b0);
+      VEC_STORE_BE_NOSWAP (out, 1, b1);
+      VEC_STORE_BE_NOSWAP (out, 2, b2);
+      VEC_STORE_BE_NOSWAP (out, 3, b3);
+      VEC_STORE_BE_NOSWAP (out, 4, b4);
+      VEC_STORE_BE_NOSWAP (out, 5, b5);
+      VEC_STORE_BE_NOSWAP (out, 6, b6);
+      VEC_STORE_BE_NOSWAP (out, 7, b7);
+      out += 8;
+    }
+
+  if (nblocks >= 4)
+    {
+      in0 = VEC_LOAD_BE (in, 0, bige_const);
+      in1 = VEC_LOAD_BE (in, 1, bige_const);
+      in2 = VEC_LOAD_BE (in, 2, bige_const);
+      in3 = VEC_LOAD_BE (in, 3, bige_const);
+
+      b0 = asm_xor (rkey0, in0);
+      b1 = asm_xor (rkey0, in1);
+      b2 = asm_xor (rkey0, in2);
+      b3 = asm_xor (rkey0, in3);
+
+#define DO_ROUND(r) \
+	      rkey = ALIGNED_LOAD (rk, r); \
+	      b0 = asm_ncipher_be (b0, rkey); \
+	      b1 = asm_ncipher_be (b1, rkey); \
+	      b2 = asm_ncipher_be (b2, rkey); \
+	      b3 = asm_ncipher_be (b3, rkey);
+
+      DO_ROUND(1);
+      DO_ROUND(2);
+      DO_ROUND(3);
+      DO_ROUND(4);
+      DO_ROUND(5);
+      DO_ROUND(6);
+      DO_ROUND(7);
+      DO_ROUND(8);
+      DO_ROUND(9);
+      if (rounds >= 12)
+	{
+	  DO_ROUND(10);
+	  DO_ROUND(11);
+	  if (rounds > 12)
+	    {
+	      DO_ROUND(12);
+	      DO_ROUND(13);
+	    }
+	}
+
+#undef DO_ROUND
+
+      iv = asm_xor (rkeylast, iv);
+      in0 = asm_xor (rkeylast, in0);
+      in1 = asm_xor (rkeylast, in1);
+      in2 = asm_xor (rkeylast, in2);
+
+      b0 = asm_ncipherlast_be (b0, iv);
+      iv = in3;
+      b1 = asm_ncipherlast_be (b1, in0);
+      b2 = asm_ncipherlast_be (b2, in1);
+      b3 = asm_ncipherlast_be (b3, in2);
+
+      VEC_STORE_BE (out, 0, b0, bige_const);
+      VEC_STORE_BE (out, 1, b1, bige_const);
+      VEC_STORE_BE (out, 2, b2, bige_const);
+      VEC_STORE_BE (out, 3, b3, bige_const);
+
+      in += 4;
+      out += 4;
+      nblocks -= 4;
+    }
+
+  for (; nblocks; nblocks--)
+    {
+      rkeylast = rkeylast_orig ^ iv;
+
+      iv = VEC_LOAD_BE (in, 0, bige_const);
+      b = iv;
+      AES_DECRYPT (b, rounds);
+
+      VEC_STORE_BE (out, 0, b, bige_const);
+
+      in++;
+      out++;
+    }
+
+  VEC_STORE_BE (iv_arg, 0, iv, bige_const);
+}
+
+
+void CTR_ENC_FUNC (void *context, unsigned char *ctr_arg,
+		   void *outbuf_arg, const void *inbuf_arg,
+		   size_t nblocks)
+{
+  static const unsigned char vec_one_const[16] =
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
+  const block bige_const = asm_load_be_const();
+  RIJNDAEL_context *ctx = context;
+  const u128_t *rk = (u128_t *)&ctx->keyschenc;
+  const u128_t *in = (const u128_t *)inbuf_arg;
+  u128_t *out = (u128_t *)outbuf_arg;
+  int rounds = ctx->rounds;
+  ROUND_KEY_VARIABLES;
+  block rkeylast_orig;
+  block ctr, b, one;
+
+  ctr = VEC_LOAD_BE (ctr_arg, 0, bige_const);
+  one = VEC_LOAD_BE (&vec_one_const, 0, bige_const);
+
+  PRELOAD_ROUND_KEYS (rounds);
+  rkeylast_orig = rkeylast;
+
+  if (nblocks >= 4)
+    {
+      block in0, in1, in2, in3, in4, in5, in6, in7;
+      block b0, b1, b2, b3, b4, b5, b6, b7;
+      block two, three, four;
+      block rkey;
+
+      two   = asm_add_uint128 (one, one);
+      three = asm_add_uint128 (two, one);
+      four  = asm_add_uint128 (two, two);
+
+      for (; nblocks >= 8; nblocks -= 8)
+	{
+	  b1 = asm_add_uint128 (ctr, one);
+	  b2 = asm_add_uint128 (ctr, two);
+	  b3 = asm_add_uint128 (ctr, three);
+	  b4 = asm_add_uint128 (ctr, four);
+	  b5 = asm_add_uint128 (b1, four);
+	  b6 = asm_add_uint128 (b2, four);
+	  b7 = asm_add_uint128 (b3, four);
+	  b0 = asm_xor (rkey0, ctr);
+	  rkey = ALIGNED_LOAD (rk, 1);
+	  ctr = asm_add_uint128 (b4, four);
+	  b1 = asm_xor (rkey0, b1);
+	  b2 = asm_xor (rkey0, b2);
+	  b3 = asm_xor (rkey0, b3);
+	  b0 = asm_cipher_be (b0, rkey);
+	  b1 = asm_cipher_be (b1, rkey);
+	  b2 = asm_cipher_be (b2, rkey);
+	  b3 = asm_cipher_be (b3, rkey);
+	  b4 = asm_xor (rkey0, b4);
+	  b5 = asm_xor (rkey0, b5);
+	  b6 = asm_xor (rkey0, b6);
+	  b7 = asm_xor (rkey0, b7);
+	  b4 = asm_cipher_be (b4, rkey);
+	  b5 = asm_cipher_be (b5, rkey);
+	  b6 = asm_cipher_be (b6, rkey);
+	  b7 = asm_cipher_be (b7, rkey);
+
+#define DO_ROUND(r) \
+	      rkey = ALIGNED_LOAD (rk, r); \
+	      b0 = asm_cipher_be (b0, rkey); \
+	      b1 = asm_cipher_be (b1, rkey); \
+	      b2 = asm_cipher_be (b2, rkey); \
+	      b3 = asm_cipher_be (b3, rkey); \
+	      b4 = asm_cipher_be (b4, rkey); \
+	      b5 = asm_cipher_be (b5, rkey); \
+	      b6 = asm_cipher_be (b6, rkey); \
+	      b7 = asm_cipher_be (b7, rkey);
+
+	  in0 = VEC_LOAD_BE_NOSWAP (in, 0);
+	  DO_ROUND(2);
+	  in1 = VEC_LOAD_BE_NOSWAP (in, 1);
+	  DO_ROUND(3);
+	  in2 = VEC_LOAD_BE_NOSWAP (in, 2);
+	  DO_ROUND(4);
+	  in3 = VEC_LOAD_BE_NOSWAP (in, 3);
+	  DO_ROUND(5);
+	  in4 = VEC_LOAD_BE_NOSWAP (in, 4);
+	  DO_ROUND(6);
+	  in5 = VEC_LOAD_BE_NOSWAP (in, 5);
+	  DO_ROUND(7);
+	  in6 = VEC_LOAD_BE_NOSWAP (in, 6);
+	  DO_ROUND(8);
+	  in7 = VEC_LOAD_BE_NOSWAP (in, 7);
+	  in += 8;
+	  DO_ROUND(9);
+
+	  if (rounds >= 12)
+	    {
+	      DO_ROUND(10);
+	      DO_ROUND(11);
+	      if (rounds > 12)
+		{
+		  DO_ROUND(12);
+		  DO_ROUND(13);
+		}
+	    }
+
+#undef DO_ROUND
+
+	  in0 = VEC_BE_SWAP (in0, bige_const);
+	  in1 = VEC_BE_SWAP (in1, bige_const);
+	  in2 = VEC_BE_SWAP (in2, bige_const);
+	  in3 = VEC_BE_SWAP (in3, bige_const);
+	  in4 = VEC_BE_SWAP (in4, bige_const);
+	  in5 = VEC_BE_SWAP (in5, bige_const);
+	  in6 = VEC_BE_SWAP (in6, bige_const);
+	  in7 = VEC_BE_SWAP (in7, bige_const);
+
+	  in0 = asm_xor (rkeylast, in0);
+	  in1 = asm_xor (rkeylast, in1);
+	  in2 = asm_xor (rkeylast, in2);
+	  in3 = asm_xor (rkeylast, in3);
+	  b0 = asm_cipherlast_be (b0, in0);
+	  b1 = asm_cipherlast_be (b1, in1);
+	  in4 = asm_xor (rkeylast, in4);
+	  in5 = asm_xor (rkeylast, in5);
+	  b2 = asm_cipherlast_be (b2, in2);
+	  b3 = asm_cipherlast_be (b3, in3);
+	  in6 = asm_xor (rkeylast, in6);
+	  in7 = asm_xor (rkeylast, in7);
+	  b4 = asm_cipherlast_be (b4, in4);
+	  b5 = asm_cipherlast_be (b5, in5);
+	  b6 = asm_cipherlast_be (b6, in6);
+	  b7 = asm_cipherlast_be (b7, in7);
+
+	  b0 = VEC_BE_SWAP (b0, bige_const);
+	  b1 = VEC_BE_SWAP (b1, bige_const);
+	  b2 = VEC_BE_SWAP (b2, bige_const);
+	  b3 = VEC_BE_SWAP (b3, bige_const);
+	  b4 = VEC_BE_SWAP (b4, bige_const);
+	  b5 = VEC_BE_SWAP (b5, bige_const);
+	  b6 = VEC_BE_SWAP (b6, bige_const);
+	  b7 = VEC_BE_SWAP (b7, bige_const);
+	  VEC_STORE_BE_NOSWAP (out, 0, b0);
+	  VEC_STORE_BE_NOSWAP (out, 1, b1);
+	  VEC_STORE_BE_NOSWAP (out, 2, b2);
+	  VEC_STORE_BE_NOSWAP (out, 3, b3);
+	  VEC_STORE_BE_NOSWAP (out, 4, b4);
+	  VEC_STORE_BE_NOSWAP (out, 5, b5);
+	  VEC_STORE_BE_NOSWAP (out, 6, b6);
+	  VEC_STORE_BE_NOSWAP (out, 7, b7);
+	  out += 8;
+	}
+
+      if (nblocks >= 4)
+	{
+	  b1 = asm_add_uint128 (ctr, one);
+	  b2 = asm_add_uint128 (ctr, two);
+	  b3 = asm_add_uint128 (ctr, three);
+	  b0 = asm_xor (rkey0, ctr);
+	  ctr = asm_add_uint128 (ctr, four);
+	  b1 = asm_xor (rkey0, b1);
+	  b2 = asm_xor (rkey0, b2);
+	  b3 = asm_xor (rkey0, b3);
+
+#define DO_ROUND(r) \
+	      rkey = ALIGNED_LOAD (rk, r); \
+	      b0 = asm_cipher_be (b0, rkey); \
+	      b1 = asm_cipher_be (b1, rkey); \
+	      b2 = asm_cipher_be (b2, rkey); \
+	      b3 = asm_cipher_be (b3, rkey);
+
+	  DO_ROUND(1);
+	  DO_ROUND(2);
+	  DO_ROUND(3);
+	  DO_ROUND(4);
+	  DO_ROUND(5);
+	  DO_ROUND(6);
+	  DO_ROUND(7);
+	  DO_ROUND(8);
+
+	  in0 = VEC_LOAD_BE (in, 0, bige_const);
+	  in1 = VEC_LOAD_BE (in, 1, bige_const);
+	  in2 = VEC_LOAD_BE (in, 2, bige_const);
+	  in3 = VEC_LOAD_BE (in, 3, bige_const);
+
+	  DO_ROUND(9);
+	  if (rounds >= 12)
+	    {
+	      DO_ROUND(10);
+	      DO_ROUND(11);
+	      if (rounds > 12)
+		{
+		  DO_ROUND(12);
+		  DO_ROUND(13);
+		}
+	    }
+
+#undef DO_ROUND
+
+	  in0 = asm_xor (rkeylast, in0);
+	  in1 = asm_xor (rkeylast, in1);
+	  in2 = asm_xor (rkeylast, in2);
+	  in3 = asm_xor (rkeylast, in3);
+
+	  b0 = asm_cipherlast_be (b0, in0);
+	  b1 = asm_cipherlast_be (b1, in1);
+	  b2 = asm_cipherlast_be (b2, in2);
+	  b3 = asm_cipherlast_be (b3, in3);
+
+	  VEC_STORE_BE (out, 0, b0, bige_const);
+	  VEC_STORE_BE (out, 1, b1, bige_const);
+	  VEC_STORE_BE (out, 2, b2, bige_const);
+	  VEC_STORE_BE (out, 3, b3, bige_const);
+
+	  in += 4;
+	  out += 4;
+	  nblocks -= 4;
+	}
+    }
+
+  for (; nblocks; nblocks--)
+    {
+      b = ctr;
+      ctr = asm_add_uint128 (ctr, one);
+      rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, 0, bige_const);
+
+      AES_ENCRYPT (b, rounds);
+
+      VEC_STORE_BE (out, 0, b, bige_const);
+
+      out++;
+      in++;
+    }
+
+  VEC_STORE_BE (ctr_arg, 0, ctr, bige_const);
+}
+
+
+size_t OCB_CRYPT_FUNC (gcry_cipher_hd_t c, void *outbuf_arg,
+		       const void *inbuf_arg, size_t nblocks,
+		       int encrypt)
+{
+  const block bige_const = asm_load_be_const();
+  RIJNDAEL_context *ctx = (void *)&c->context.c;
+  const u128_t *in = (const u128_t *)inbuf_arg;
+  u128_t *out = (u128_t *)outbuf_arg;
+  int rounds = ctx->rounds;
+  u64 data_nblocks = c->u_mode.ocb.data_nblocks;
+  block l0, l1, l2, l;
+  block b0, b1, b2, b3, b4, b5, b6, b7, b;
+  block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7;
+  block rkey, rkeylf;
+  block ctr, iv;
+  ROUND_KEY_VARIABLES;
+
+  iv = VEC_LOAD_BE (c->u_iv.iv, 0, bige_const);
+  ctr = VEC_LOAD_BE (c->u_ctr.ctr, 0, bige_const);
+
+  l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], 0, bige_const);
+  l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], 0, bige_const);
+  l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], 0, bige_const);
+
+  if (encrypt)
+    {
+      const u128_t *rk = (u128_t *)&ctx->keyschenc;
+
+      PRELOAD_ROUND_KEYS (rounds);
+
+      for (; nblocks >= 8 && data_nblocks % 8; nblocks--)
+	{
+	  l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
+	  b = VEC_LOAD_BE (in, 0, bige_const);
+
+	  /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+	  iv ^= l;
+	  /* Checksum_i = Checksum_{i-1} xor P_i  */
+	  ctr ^= b;
+	  /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+	  b ^= iv;
+	  AES_ENCRYPT (b, rounds);
+	  b ^= iv;
+
+	  VEC_STORE_BE (out, 0, b, bige_const);
+
+	  in += 1;
+	  out += 1;
+	}
+
+      for (; nblocks >= 8; nblocks -= 8)
+	{
+	  b0 = VEC_LOAD_BE_NOSWAP (in, 0);
+	  b1 = VEC_LOAD_BE_NOSWAP (in, 1);
+	  b2 = VEC_LOAD_BE_NOSWAP (in, 2);
+	  b3 = VEC_LOAD_BE_NOSWAP (in, 3);
+	  b4 = VEC_LOAD_BE_NOSWAP (in, 4);
+	  b5 = VEC_LOAD_BE_NOSWAP (in, 5);
+	  b6 = VEC_LOAD_BE_NOSWAP (in, 6);
+	  b7 = VEC_LOAD_BE_NOSWAP (in, 7);
+	  in += 8;
+	  l = VEC_LOAD_BE_NOSWAP (ocb_get_l (c, data_nblocks += 8), 0);
+	  b0 = VEC_BE_SWAP(b0, bige_const);
+	  b1 = VEC_BE_SWAP(b1, bige_const);
+	  b2 = VEC_BE_SWAP(b2, bige_const);
+	  b3 = VEC_BE_SWAP(b3, bige_const);
+	  b4 = VEC_BE_SWAP(b4, bige_const);
+	  b5 = VEC_BE_SWAP(b5, bige_const);
+	  b6 = VEC_BE_SWAP(b6, bige_const);
+	  b7 = VEC_BE_SWAP(b7, bige_const);
+	  l = VEC_BE_SWAP(l, bige_const);
+
+	  ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7;
+
+	  iv ^= rkey0;
+
+	  iv0 = iv ^ l0;
+	  iv1 = iv ^ l0 ^ l1;
+	  iv2 = iv ^ l1;
+	  iv3 = iv ^ l1 ^ l2;
+	  iv4 = iv ^ l1 ^ l2 ^ l0;
+	  iv5 = iv ^ l2 ^ l0;
+	  iv6 = iv ^ l2;
+	  iv7 = iv ^ l2 ^ l;
+
+	  b0 ^= iv0;
+	  b1 ^= iv1;
+	  b2 ^= iv2;
+	  b3 ^= iv3;
+	  b4 ^= iv4;
+	  b5 ^= iv5;
+	  b6 ^= iv6;
+	  b7 ^= iv7;
+	  iv = iv7 ^ rkey0;
+
+#define DO_ROUND(r) \
+	      rkey = ALIGNED_LOAD (rk, r); \
+	      b0 = asm_cipher_be (b0, rkey); \
+	      b1 = asm_cipher_be (b1, rkey); \
+	      b2 = asm_cipher_be (b2, rkey); \
+	      b3 = asm_cipher_be (b3, rkey); \
+	      b4 = asm_cipher_be (b4, rkey); \
+	      b5 = asm_cipher_be (b5, rkey); \
+	      b6 = asm_cipher_be (b6, rkey); \
+	      b7 = asm_cipher_be (b7, rkey);
+
+	  DO_ROUND(1);
+	  DO_ROUND(2);
+	  DO_ROUND(3);
+	  DO_ROUND(4);
+	  DO_ROUND(5);
+	  DO_ROUND(6);
+	  DO_ROUND(7);
+
+	  rkeylf = asm_xor (rkeylast, rkey0);
+
+	  DO_ROUND(8);
+
+	  iv0 = asm_xor (rkeylf, iv0);
+	  iv1 = asm_xor (rkeylf, iv1);
+	  iv2 = asm_xor (rkeylf, iv2);
+	  iv3 = asm_xor (rkeylf, iv3);
+	  iv4 = asm_xor (rkeylf, iv4);
+	  iv5 = asm_xor (rkeylf, iv5);
+	  iv6 = asm_xor (rkeylf, iv6);
+	  iv7 = asm_xor (rkeylf, iv7);
+
+	  DO_ROUND(9);
+	  if (rounds >= 12)
+	    {
+	      DO_ROUND(10);
+	      DO_ROUND(11);
+	      if (rounds > 12)
+		{
+		  DO_ROUND(12);
+		  DO_ROUND(13);
+		}
+	    }
+
+#undef DO_ROUND
+
+	  b0 = asm_cipherlast_be (b0, iv0);
+	  b1 = asm_cipherlast_be (b1, iv1);
+	  b2 = asm_cipherlast_be (b2, iv2);
+	  b3 = asm_cipherlast_be (b3, iv3);
+	  b4 = asm_cipherlast_be (b4, iv4);
+	  b5 = asm_cipherlast_be (b5, iv5);
+	  b6 = asm_cipherlast_be (b6, iv6);
+	  b7 = asm_cipherlast_be (b7, iv7);
+
+	  b0 = VEC_BE_SWAP (b0, bige_const);
+	  b1 = VEC_BE_SWAP (b1, bige_const);
+	  b2 = VEC_BE_SWAP (b2, bige_const);
+	  b3 = VEC_BE_SWAP (b3, bige_const);
+	  b4 = VEC_BE_SWAP (b4, bige_const);
+	  b5 = VEC_BE_SWAP (b5, bige_const);
+	  b6 = VEC_BE_SWAP (b6, bige_const);
+	  b7 = VEC_BE_SWAP (b7, bige_const);
+	  VEC_STORE_BE_NOSWAP (out, 0, b0);
+	  VEC_STORE_BE_NOSWAP (out, 1, b1);
+	  VEC_STORE_BE_NOSWAP (out, 2, b2);
+	  VEC_STORE_BE_NOSWAP (out, 3, b3);
+	  VEC_STORE_BE_NOSWAP (out, 4, b4);
+	  VEC_STORE_BE_NOSWAP (out, 5, b5);
+	  VEC_STORE_BE_NOSWAP (out, 6, b6);
+	  VEC_STORE_BE_NOSWAP (out, 7, b7);
+	  out += 8;
+	}
+
+      if (nblocks >= 4 && (data_nblocks % 4) == 0)
+	{
+	  b0 = VEC_LOAD_BE (in, 0, bige_const);
+	  b1 = VEC_LOAD_BE (in, 1, bige_const);
+	  b2 = VEC_LOAD_BE (in, 2, bige_const);
+	  b3 = VEC_LOAD_BE (in, 3, bige_const);
+
+	  l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const);
+
+	  ctr ^= b0 ^ b1 ^ b2 ^ b3;
+
+	  iv ^= rkey0;
+
+	  iv0 = iv ^ l0;
+	  iv1 = iv ^ l0 ^ l1;
+	  iv2 = iv ^ l1;
+	  iv3 = iv ^ l1 ^ l;
+
+	  b0 ^= iv0;
+	  b1 ^= iv1;
+	  b2 ^= iv2;
+	  b3 ^= iv3;
+	  iv = iv3 ^ rkey0;
+
+#define DO_ROUND(r) \
+	      rkey = ALIGNED_LOAD (rk, r); \
+	      b0 = asm_cipher_be (b0, rkey); \
+	      b1 = asm_cipher_be (b1, rkey); \
+	      b2 = asm_cipher_be (b2, rkey); \
+	      b3 = asm_cipher_be (b3, rkey);
+
+	  DO_ROUND(1);
+	  DO_ROUND(2);
+	  DO_ROUND(3);
+	  DO_ROUND(4);
+	  DO_ROUND(5);
+	  DO_ROUND(6);
+	  DO_ROUND(7);
+	  DO_ROUND(8);
+	  DO_ROUND(9);
+	  if (rounds >= 12)
+	    {
+	      DO_ROUND(10);
+	      DO_ROUND(11);
+	      if (rounds > 12)
+		{
+		  DO_ROUND(12);
+		  DO_ROUND(13);
+		}
+	    }
+
+#undef DO_ROUND
+
+	  rkey = rkeylast ^ rkey0;
+	  b0 = asm_cipherlast_be (b0, rkey ^ iv0);
+	  b1 = asm_cipherlast_be (b1, rkey ^ iv1);
+	  b2 = asm_cipherlast_be (b2, rkey ^ iv2);
+	  b3 = asm_cipherlast_be (b3, rkey ^ iv3);
+
+	  VEC_STORE_BE (out, 0, b0, bige_const);
+	  VEC_STORE_BE (out, 1, b1, bige_const);
+	  VEC_STORE_BE (out, 2, b2, bige_const);
+	  VEC_STORE_BE (out, 3, b3, bige_const);
+
+	  in += 4;
+	  out += 4;
+	  nblocks -= 4;
+	}
+
+      for (; nblocks; nblocks--)
+	{
+	  l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
+	  b = VEC_LOAD_BE (in, 0, bige_const);
+
+	  /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+	  iv ^= l;
+	  /* Checksum_i = Checksum_{i-1} xor P_i  */
+	  ctr ^= b;
+	  /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+	  b ^= iv;
+	  AES_ENCRYPT (b, rounds);
+	  b ^= iv;
+
+	  VEC_STORE_BE (out, 0, b, bige_const);
+
+	  in += 1;
+	  out += 1;
+	}
+    }
+  else
+    {
+      const u128_t *rk = (u128_t *)&ctx->keyschdec;
+
+      if (!ctx->decryption_prepared)
+	{
+	  internal_aes_ppc_prepare_decryption (ctx);
+	  ctx->decryption_prepared = 1;
+	}
+
+      PRELOAD_ROUND_KEYS (rounds);
+
+      for (; nblocks >= 8 && data_nblocks % 8; nblocks--)
+	{
+	  l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
+	  b = VEC_LOAD_BE (in, 0, bige_const);
+
+	  /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+	  iv ^= l;
+	  /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
+	  b ^= iv;
+	  AES_DECRYPT (b, rounds);
+	  b ^= iv;
+	  /* Checksum_i = Checksum_{i-1} xor P_i  */
+	  ctr ^= b;
+
+	  VEC_STORE_BE (out, 0, b, bige_const);
+
+	  in += 1;
+	  out += 1;
+	}
+
+      for (; nblocks >= 8; nblocks -= 8)
+	{
+	  b0 = VEC_LOAD_BE_NOSWAP (in, 0);
+	  b1 = VEC_LOAD_BE_NOSWAP (in, 1);
+	  b2 = VEC_LOAD_BE_NOSWAP (in, 2);
+	  b3 = VEC_LOAD_BE_NOSWAP (in, 3);
+	  b4 = VEC_LOAD_BE_NOSWAP (in, 4);
+	  b5 = VEC_LOAD_BE_NOSWAP (in, 5);
+	  b6 = VEC_LOAD_BE_NOSWAP (in, 6);
+	  b7 = VEC_LOAD_BE_NOSWAP (in, 7);
+	  in += 8;
+	  l = VEC_LOAD_BE_NOSWAP (ocb_get_l (c, data_nblocks += 8), 0);
+	  b0 = VEC_BE_SWAP(b0, bige_const);
+	  b1 = VEC_BE_SWAP(b1, bige_const);
+	  b2 = VEC_BE_SWAP(b2, bige_const);
+	  b3 = VEC_BE_SWAP(b3, bige_const);
+	  b4 = VEC_BE_SWAP(b4, bige_const);
+	  b5 = VEC_BE_SWAP(b5, bige_const);
+	  b6 = VEC_BE_SWAP(b6, bige_const);
+	  b7 = VEC_BE_SWAP(b7, bige_const);
+	  l = VEC_BE_SWAP(l, bige_const);
+
+	  iv ^= rkey0;
+
+	  iv0 = iv ^ l0;
+	  iv1 = iv ^ l0 ^ l1;
+	  iv2 = iv ^ l1;
+	  iv3 = iv ^ l1 ^ l2;
+	  iv4 = iv ^ l1 ^ l2 ^ l0;
+	  iv5 = iv ^ l2 ^ l0;
+	  iv6 = iv ^ l2;
+	  iv7 = iv ^ l2 ^ l;
+
+	  b0 ^= iv0;
+	  b1 ^= iv1;
+	  b2 ^= iv2;
+	  b3 ^= iv3;
+	  b4 ^= iv4;
+	  b5 ^= iv5;
+	  b6 ^= iv6;
+	  b7 ^= iv7;
+	  iv = iv7 ^ rkey0;
+
+#define DO_ROUND(r) \
+	      rkey = ALIGNED_LOAD (rk, r); \
+	      b0 = asm_ncipher_be (b0, rkey); \
+	      b1 = asm_ncipher_be (b1, rkey); \
+	      b2 = asm_ncipher_be (b2, rkey); \
+	      b3 = asm_ncipher_be (b3, rkey); \
+	      b4 = asm_ncipher_be (b4, rkey); \
+	      b5 = asm_ncipher_be (b5, rkey); \
+	      b6 = asm_ncipher_be (b6, rkey); \
+	      b7 = asm_ncipher_be (b7, rkey);
+
+	  DO_ROUND(1);
+	  DO_ROUND(2);
+	  DO_ROUND(3);
+	  DO_ROUND(4);
+	  DO_ROUND(5);
+	  DO_ROUND(6);
+	  DO_ROUND(7);
+
+	  rkeylf = asm_xor (rkeylast, rkey0);
+
+	  DO_ROUND(8);
+
+	  iv0 = asm_xor (rkeylf, iv0);
+	  iv1 = asm_xor (rkeylf, iv1);
+	  iv2 = asm_xor (rkeylf, iv2);
+	  iv3 = asm_xor (rkeylf, iv3);
+	  iv4 = asm_xor (rkeylf, iv4);
+	  iv5 = asm_xor (rkeylf, iv5);
+	  iv6 = asm_xor (rkeylf, iv6);
+	  iv7 = asm_xor (rkeylf, iv7);
+
+	  DO_ROUND(9);
+	  if (rounds >= 12)
+	    {
+	      DO_ROUND(10);
+	      DO_ROUND(11);
+	      if (rounds > 12)
+		{
+		  DO_ROUND(12);
+		  DO_ROUND(13);
+		}
+	    }
+
+#undef DO_ROUND
+
+	  b0 = asm_ncipherlast_be (b0, iv0);
+	  b1 = asm_ncipherlast_be (b1, iv1);
+	  b2 = asm_ncipherlast_be (b2, iv2);
+	  b3 = asm_ncipherlast_be (b3, iv3);
+	  b4 = asm_ncipherlast_be (b4, iv4);
+	  b5 = asm_ncipherlast_be (b5, iv5);
+	  b6 = asm_ncipherlast_be (b6, iv6);
+	  b7 = asm_ncipherlast_be (b7, iv7);
+
+	  ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7;
+
+	  b0 = VEC_BE_SWAP (b0, bige_const);
+	  b1 = VEC_BE_SWAP (b1, bige_const);
+	  b2 = VEC_BE_SWAP (b2, bige_const);
+	  b3 = VEC_BE_SWAP (b3, bige_const);
+	  b4 = VEC_BE_SWAP (b4, bige_const);
+	  b5 = VEC_BE_SWAP (b5, bige_const);
+	  b6 = VEC_BE_SWAP (b6, bige_const);
+	  b7 = VEC_BE_SWAP (b7, bige_const);
+	  VEC_STORE_BE_NOSWAP (out, 0, b0);
+	  VEC_STORE_BE_NOSWAP (out, 1, b1);
+	  VEC_STORE_BE_NOSWAP (out, 2, b2);
+	  VEC_STORE_BE_NOSWAP (out, 3, b3);
+	  VEC_STORE_BE_NOSWAP (out, 4, b4);
+	  VEC_STORE_BE_NOSWAP (out, 5, b5);
+	  VEC_STORE_BE_NOSWAP (out, 6, b6);
+	  VEC_STORE_BE_NOSWAP (out, 7, b7);
+	  out += 8;
+	}
+
+      if (nblocks >= 4 && (data_nblocks % 4) == 0)
+	{
+	  b0 = VEC_LOAD_BE (in, 0, bige_const);
+	  b1 = VEC_LOAD_BE (in, 1, bige_const);
+	  b2 = VEC_LOAD_BE (in, 2, bige_const);
+	  b3 = VEC_LOAD_BE (in, 3, bige_const);
+
+	  l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const);
+
+	  iv ^= rkey0;
+
+	  iv0 = iv ^ l0;
+	  iv1 = iv ^ l0 ^ l1;
+	  iv2 = iv ^ l1;
+	  iv3 = iv ^ l1 ^ l;
+
+	  b0 ^= iv0;
+	  b1 ^= iv1;
+	  b2 ^= iv2;
+	  b3 ^= iv3;
+	  iv = iv3 ^ rkey0;
+
+#define DO_ROUND(r) \
+	      rkey = ALIGNED_LOAD (rk, r); \
+	      b0 = asm_ncipher_be (b0, rkey); \
+	      b1 = asm_ncipher_be (b1, rkey); \
+	      b2 = asm_ncipher_be (b2, rkey); \
+	      b3 = asm_ncipher_be (b3, rkey);
+
+	  DO_ROUND(1);
+	  DO_ROUND(2);
+	  DO_ROUND(3);
+	  DO_ROUND(4);
+	  DO_ROUND(5);
+	  DO_ROUND(6);
+	  DO_ROUND(7);
+	  DO_ROUND(8);
+	  DO_ROUND(9);
+	  if (rounds >= 12)
+	    {
+	      DO_ROUND(10);
+	      DO_ROUND(11);
+	      if (rounds > 12)
+		{
+		  DO_ROUND(12);
+		  DO_ROUND(13);
+		}
+	    }
+
+#undef DO_ROUND
+
+	  rkey = rkeylast ^ rkey0;
+	  b0 = asm_ncipherlast_be (b0, rkey ^ iv0);
+	  b1 = asm_ncipherlast_be (b1, rkey ^ iv1);
+	  b2 = asm_ncipherlast_be (b2, rkey ^ iv2);
+	  b3 = asm_ncipherlast_be (b3, rkey ^ iv3);
+
+	  VEC_STORE_BE (out, 0, b0, bige_const);
+	  VEC_STORE_BE (out, 1, b1, bige_const);
+	  VEC_STORE_BE (out, 2, b2, bige_const);
+	  VEC_STORE_BE (out, 3, b3, bige_const);
+
+	  ctr ^= b0 ^ b1 ^ b2 ^ b3;
+
+	  in += 4;
+	  out += 4;
+	  nblocks -= 4;
+	}
+
+      for (; nblocks; nblocks--)
+	{
+	  l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
+	  b = VEC_LOAD_BE (in, 0, bige_const);
+
+	  /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+	  iv ^= l;
+	  /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
+	  b ^= iv;
+	  AES_DECRYPT (b, rounds);
+	  b ^= iv;
+	  /* Checksum_i = Checksum_{i-1} xor P_i  */
+	  ctr ^= b;
+
+	  VEC_STORE_BE (out, 0, b, bige_const);
+
+	  in += 1;
+	  out += 1;
+	}
+    }
+
+  VEC_STORE_BE (c->u_iv.iv, 0, iv, bige_const);
+  VEC_STORE_BE (c->u_ctr.ctr, 0, ctr, bige_const);
+  c->u_mode.ocb.data_nblocks = data_nblocks;
+
+  return 0;
+}
+
+size_t OCB_AUTH_FUNC (gcry_cipher_hd_t c, void *abuf_arg, size_t nblocks)
+{
+  const block bige_const = asm_load_be_const();
+  RIJNDAEL_context *ctx = (void *)&c->context.c;
+  const u128_t *rk = (u128_t *)&ctx->keyschenc;
+  const u128_t *abuf = (const u128_t *)abuf_arg;
+  int rounds = ctx->rounds;
+  u64 data_nblocks = c->u_mode.ocb.aad_nblocks;
+  block l0, l1, l2, l;
+  block b0, b1, b2, b3, b4, b5, b6, b7, b;
+  block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7;
+  block rkey, frkey;
+  block ctr, iv;
+  ROUND_KEY_VARIABLES;
+
+  iv = VEC_LOAD_BE (c->u_mode.ocb.aad_offset, 0, bige_const);
+  ctr = VEC_LOAD_BE (c->u_mode.ocb.aad_sum, 0, bige_const);
+
+  l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], 0, bige_const);
+  l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], 0, bige_const);
+  l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], 0, bige_const);
+
+  PRELOAD_ROUND_KEYS (rounds);
+
+  for (; nblocks >= 8 && data_nblocks % 8; nblocks--)
+    {
+      l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
+      b = VEC_LOAD_BE (abuf, 0, bige_const);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      iv ^= l;
+      /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+      b ^= iv;
+      AES_ENCRYPT (b, rounds);
+      ctr ^= b;
+
+      abuf += 1;
+    }
+
+  for (; nblocks >= 8; nblocks -= 8)
+    {
+      b0 = VEC_LOAD_BE (abuf, 0, bige_const);
+      b1 = VEC_LOAD_BE (abuf, 1, bige_const);
+      b2 = VEC_LOAD_BE (abuf, 2, bige_const);
+      b3 = VEC_LOAD_BE (abuf, 3, bige_const);
+      b4 = VEC_LOAD_BE (abuf, 4, bige_const);
+      b5 = VEC_LOAD_BE (abuf, 5, bige_const);
+      b6 = VEC_LOAD_BE (abuf, 6, bige_const);
+      b7 = VEC_LOAD_BE (abuf, 7, bige_const);
+
+      l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 8), 0, bige_const);
+
+      frkey = rkey0;
+      iv ^= frkey;
+
+      iv0 = iv ^ l0;
+      iv1 = iv ^ l0 ^ l1;
+      iv2 = iv ^ l1;
+      iv3 = iv ^ l1 ^ l2;
+      iv4 = iv ^ l1 ^ l2 ^ l0;
+      iv5 = iv ^ l2 ^ l0;
+      iv6 = iv ^ l2;
+      iv7 = iv ^ l2 ^ l;
+
+      b0 ^= iv0;
+      b1 ^= iv1;
+      b2 ^= iv2;
+      b3 ^= iv3;
+      b4 ^= iv4;
+      b5 ^= iv5;
+      b6 ^= iv6;
+      b7 ^= iv7;
+      iv = iv7 ^ frkey;
+
+#define DO_ROUND(r) \
+	      rkey = ALIGNED_LOAD (rk, r); \
+	      b0 = asm_cipher_be (b0, rkey); \
+	      b1 = asm_cipher_be (b1, rkey); \
+	      b2 = asm_cipher_be (b2, rkey); \
+	      b3 = asm_cipher_be (b3, rkey); \
+	      b4 = asm_cipher_be (b4, rkey); \
+	      b5 = asm_cipher_be (b5, rkey); \
+	      b6 = asm_cipher_be (b6, rkey); \
+	      b7 = asm_cipher_be (b7, rkey);
+
+      DO_ROUND(1);
+      DO_ROUND(2);
+      DO_ROUND(3);
+      DO_ROUND(4);
+      DO_ROUND(5);
+      DO_ROUND(6);
+      DO_ROUND(7);
+      DO_ROUND(8);
+      DO_ROUND(9);
+      if (rounds >= 12)
+	{
+	  DO_ROUND(10);
+	  DO_ROUND(11);
+	  if (rounds > 12)
+	    {
+	      DO_ROUND(12);
+	      DO_ROUND(13);
+	    }
+	}
+
+#undef DO_ROUND
+
+      rkey = rkeylast;
+      b0 = asm_cipherlast_be (b0, rkey);
+      b1 = asm_cipherlast_be (b1, rkey);
+      b2 = asm_cipherlast_be (b2, rkey);
+      b3 = asm_cipherlast_be (b3, rkey);
+      b4 = asm_cipherlast_be (b4, rkey);
+      b5 = asm_cipherlast_be (b5, rkey);
+      b6 = asm_cipherlast_be (b6, rkey);
+      b7 = asm_cipherlast_be (b7, rkey);
+
+      ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7;
+
+      abuf += 8;
+    }
+
+  if (nblocks >= 4 && (data_nblocks % 4) == 0)
+    {
+      b0 = VEC_LOAD_BE (abuf, 0, bige_const);
+      b1 = VEC_LOAD_BE (abuf, 1, bige_const);
+      b2 = VEC_LOAD_BE (abuf, 2, bige_const);
+      b3 = VEC_LOAD_BE (abuf, 3, bige_const);
+
+      l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const);
+
+      frkey = rkey0;
+      iv ^= frkey;
+
+      iv0 = iv ^ l0;
+      iv1 = iv ^ l0 ^ l1;
+      iv2 = iv ^ l1;
+      iv3 = iv ^ l1 ^ l;
+
+      b0 ^= iv0;
+      b1 ^= iv1;
+      b2 ^= iv2;
+      b3 ^= iv3;
+      iv = iv3 ^ frkey;
+
+#define DO_ROUND(r) \
+	      rkey = ALIGNED_LOAD (rk, r); \
+	      b0 = asm_cipher_be (b0, rkey); \
+	      b1 = asm_cipher_be (b1, rkey); \
+	      b2 = asm_cipher_be (b2, rkey); \
+	      b3 = asm_cipher_be (b3, rkey);
+
+      DO_ROUND(1);
+      DO_ROUND(2);
+      DO_ROUND(3);
+      DO_ROUND(4);
+      DO_ROUND(5);
+      DO_ROUND(6);
+      DO_ROUND(7);
+      DO_ROUND(8);
+      DO_ROUND(9);
+      if (rounds >= 12)
+	{
+	  DO_ROUND(10);
+	  DO_ROUND(11);
+	  if (rounds > 12)
+	    {
+	      DO_ROUND(12);
+	      DO_ROUND(13);
+	    }
+	}
+
+#undef DO_ROUND
+
+      rkey = rkeylast;
+      b0 = asm_cipherlast_be (b0, rkey);
+      b1 = asm_cipherlast_be (b1, rkey);
+      b2 = asm_cipherlast_be (b2, rkey);
+      b3 = asm_cipherlast_be (b3, rkey);
+
+      ctr ^= b0 ^ b1 ^ b2 ^ b3;
+
+      abuf += 4;
+      nblocks -= 4;
+    }
+
+  for (; nblocks; nblocks--)
+    {
+      l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
+      b = VEC_LOAD_BE (abuf, 0, bige_const);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      iv ^= l;
+      /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+      b ^= iv;
+      AES_ENCRYPT (b, rounds);
+      ctr ^= b;
+
+      abuf += 1;
+    }
+
+  VEC_STORE_BE (c->u_mode.ocb.aad_offset, 0, iv, bige_const);
+  VEC_STORE_BE (c->u_mode.ocb.aad_sum, 0, ctr, bige_const);
+  c->u_mode.ocb.aad_nblocks = data_nblocks;
+
+  return 0;
+}
+
+
+void XTS_CRYPT_FUNC (void *context, unsigned char *tweak_arg,
+		     void *outbuf_arg, const void *inbuf_arg,
+		     size_t nblocks, int encrypt)
+{
+#ifdef WORDS_BIGENDIAN
+  static const block vec_bswap128_const =
+    { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+#else
+  static const block vec_bswap128_const =
+    { ~15, ~14, ~13, ~12, ~11, ~10, ~9, ~8, ~7, ~6, ~5, ~4, ~3, ~2, ~1, ~0 };
+#endif
+  static const unsigned char vec_tweak_const[16] =
+    { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0x87 };
+  static const vector unsigned long long vec_shift63_const =
+    { 63, 63 };
+  const block bige_const = asm_load_be_const();
+  RIJNDAEL_context *ctx = context;
+  const u128_t *in = (const u128_t *)inbuf_arg;
+  u128_t *out = (u128_t *)outbuf_arg;
+  int rounds = ctx->rounds;
+  block tweak;
+  block b0, b1, b2, b3, b4, b5, b6, b7, b, rkey, rkeylf;
+  block tweak0, tweak1, tweak2, tweak3, tweak4, tweak5, tweak6, tweak7;
+  block tweak_const, bswap128_const, shift63_const;
+  ROUND_KEY_VARIABLES;
+
+  tweak_const = VEC_LOAD_BE (&vec_tweak_const, 0, bige_const);
+  bswap128_const = ALIGNED_LOAD (&vec_bswap128_const, 0);
+  shift63_const = ALIGNED_LOAD (&vec_shift63_const, 0);
+
+  tweak = VEC_LOAD_BE (tweak_arg, 0, bige_const);
+  tweak = asm_vperm1 (tweak, bswap128_const);
+
+#define GEN_TWEAK(tout, tin) /* Generate next tweak. */ \
+    do { \
+      block tmp1, tmp2; \
+      tmp1 = asm_swap_uint64_halfs(tin); \
+      tmp2 = asm_add_uint64(tin, tin); \
+      tmp1 = asm_sra_int64(tmp1, shift63_const) & tweak_const; \
+      tout = asm_xor(tmp1, tmp2); \
+    } while (0)
+
+  if (encrypt)
+    {
+      const u128_t *rk = (u128_t *)&ctx->keyschenc;
+
+      PRELOAD_ROUND_KEYS (rounds);
+
+      for (; nblocks >= 8; nblocks -= 8)
+	{
+	  b0 = VEC_LOAD_BE_NOSWAP (in, 0);
+	  b1 = VEC_LOAD_BE_NOSWAP (in, 1);
+	  b2 = VEC_LOAD_BE_NOSWAP (in, 2);
+	  b3 = VEC_LOAD_BE_NOSWAP (in, 3);
+	  tweak0 = tweak;
+	  GEN_TWEAK (tweak1, tweak0);
+	  tweak0 = asm_vperm1 (tweak0, bswap128_const);
+	  b4 = VEC_LOAD_BE_NOSWAP (in, 4);
+	  b5 = VEC_LOAD_BE_NOSWAP (in, 5);
+	  GEN_TWEAK (tweak2, tweak1);
+	  tweak1 = asm_vperm1 (tweak1, bswap128_const);
+	  b6 = VEC_LOAD_BE_NOSWAP (in, 6);
+	  b7 = VEC_LOAD_BE_NOSWAP (in, 7);
+	  in += 8;
+
+	  b0 = VEC_BE_SWAP(b0, bige_const);
+	  b1 = VEC_BE_SWAP(b1, bige_const);
+	  GEN_TWEAK (tweak3, tweak2);
+	  tweak2 = asm_vperm1 (tweak2, bswap128_const);
+	  GEN_TWEAK (tweak4, tweak3);
+	  tweak3 = asm_vperm1 (tweak3, bswap128_const);
+	  b2 = VEC_BE_SWAP(b2, bige_const);
+	  b3 = VEC_BE_SWAP(b3, bige_const);
+	  GEN_TWEAK (tweak5, tweak4);
+	  tweak4 = asm_vperm1 (tweak4, bswap128_const);
+	  GEN_TWEAK (tweak6, tweak5);
+	  tweak5 = asm_vperm1 (tweak5, bswap128_const);
+	  b4 = VEC_BE_SWAP(b4, bige_const);
+	  b5 = VEC_BE_SWAP(b5, bige_const);
+	  GEN_TWEAK (tweak7, tweak6);
+	  tweak6 = asm_vperm1 (tweak6, bswap128_const);
+	  GEN_TWEAK (tweak, tweak7);
+	  tweak7 = asm_vperm1 (tweak7, bswap128_const);
+	  b6 = VEC_BE_SWAP(b6, bige_const);
+	  b7 = VEC_BE_SWAP(b7, bige_const);
+
+	  tweak0 = asm_xor (tweak0, rkey0);
+	  tweak1 = asm_xor (tweak1, rkey0);
+	  tweak2 = asm_xor (tweak2, rkey0);
+	  tweak3 = asm_xor (tweak3, rkey0);
+	  tweak4 = asm_xor (tweak4, rkey0);
+	  tweak5 = asm_xor (tweak5, rkey0);
+	  tweak6 = asm_xor (tweak6, rkey0);
+	  tweak7 = asm_xor (tweak7, rkey0);
+
+	  b0 = asm_xor (b0, tweak0);
+	  b1 = asm_xor (b1, tweak1);
+	  b2 = asm_xor (b2, tweak2);
+	  b3 = asm_xor (b3, tweak3);
+	  b4 = asm_xor (b4, tweak4);
+	  b5 = asm_xor (b5, tweak5);
+	  b6 = asm_xor (b6, tweak6);
+	  b7 = asm_xor (b7, tweak7);
+
+#define DO_ROUND(r) \
+	      rkey = ALIGNED_LOAD (rk, r); \
+	      b0 = asm_cipher_be (b0, rkey); \
+	      b1 = asm_cipher_be (b1, rkey); \
+	      b2 = asm_cipher_be (b2, rkey); \
+	      b3 = asm_cipher_be (b3, rkey); \
+	      b4 = asm_cipher_be (b4, rkey); \
+	      b5 = asm_cipher_be (b5, rkey); \
+	      b6 = asm_cipher_be (b6, rkey); \
+	      b7 = asm_cipher_be (b7, rkey);
+
+	  DO_ROUND(1);
+	  DO_ROUND(2);
+	  DO_ROUND(3);
+	  DO_ROUND(4);
+	  DO_ROUND(5);
+	  DO_ROUND(6);
+	  DO_ROUND(7);
+
+	  rkeylf = asm_xor (rkeylast, rkey0);
+
+	  DO_ROUND(8);
+
+	  tweak0 = asm_xor (tweak0, rkeylf);
+	  tweak1 = asm_xor (tweak1, rkeylf);
+	  tweak2 = asm_xor (tweak2, rkeylf);
+	  tweak3 = asm_xor (tweak3, rkeylf);
+	  tweak4 = asm_xor (tweak4, rkeylf);
+	  tweak5 = asm_xor (tweak5, rkeylf);
+	  tweak6 = asm_xor (tweak6, rkeylf);
+	  tweak7 = asm_xor (tweak7, rkeylf);
+
+	  DO_ROUND(9);
+	  if (rounds >= 12)
+	    {
+	      DO_ROUND(10);
+	      DO_ROUND(11);
+	      if (rounds > 12)
+		{
+		  DO_ROUND(12);
+		  DO_ROUND(13);
+		}
+	    }
+
+#undef DO_ROUND
+
+	  b0 = asm_cipherlast_be (b0, tweak0);
+	  b1 = asm_cipherlast_be (b1, tweak1);
+	  b2 = asm_cipherlast_be (b2, tweak2);
+	  b3 = asm_cipherlast_be (b3, tweak3);
+	  b0 = VEC_BE_SWAP (b0, bige_const);
+	  b1 = VEC_BE_SWAP (b1, bige_const);
+	  b4 = asm_cipherlast_be (b4, tweak4);
+	  b5 = asm_cipherlast_be (b5, tweak5);
+	  b2 = VEC_BE_SWAP (b2, bige_const);
+	  b3 = VEC_BE_SWAP (b3, bige_const);
+	  b6 = asm_cipherlast_be (b6, tweak6);
+	  b7 = asm_cipherlast_be (b7, tweak7);
+	  VEC_STORE_BE_NOSWAP (out, 0, b0);
+	  VEC_STORE_BE_NOSWAP (out, 1, b1);
+	  b4 = VEC_BE_SWAP (b4, bige_const);
+	  b5 = VEC_BE_SWAP (b5, bige_const);
+	  VEC_STORE_BE_NOSWAP (out, 2, b2);
+	  VEC_STORE_BE_NOSWAP (out, 3, b3);
+	  b6 = VEC_BE_SWAP (b6, bige_const);
+	  b7 = VEC_BE_SWAP (b7, bige_const);
+	  VEC_STORE_BE_NOSWAP (out, 4, b4);
+	  VEC_STORE_BE_NOSWAP (out, 5, b5);
+	  VEC_STORE_BE_NOSWAP (out, 6, b6);
+	  VEC_STORE_BE_NOSWAP (out, 7, b7);
+	  out += 8;
+	}
+
+      if (nblocks >= 4)
+	{
+	  tweak0 = tweak;
+	  GEN_TWEAK (tweak1, tweak0);
+	  GEN_TWEAK (tweak2, tweak1);
+	  GEN_TWEAK (tweak3, tweak2);
+	  GEN_TWEAK (tweak, tweak3);
+
+	  b0 = VEC_LOAD_BE (in, 0, bige_const);
+	  b1 = VEC_LOAD_BE (in, 1, bige_const);
+	  b2 = VEC_LOAD_BE (in, 2, bige_const);
+	  b3 = VEC_LOAD_BE (in, 3, bige_const);
+
+	  tweak0 = asm_vperm1 (tweak0, bswap128_const);
+	  tweak1 = asm_vperm1 (tweak1, bswap128_const);
+	  tweak2 = asm_vperm1 (tweak2, bswap128_const);
+	  tweak3 = asm_vperm1 (tweak3, bswap128_const);
+
+	  b0 ^= tweak0 ^ rkey0;
+	  b1 ^= tweak1 ^ rkey0;
+	  b2 ^= tweak2 ^ rkey0;
+	  b3 ^= tweak3 ^ rkey0;
+
+#define DO_ROUND(r) \
+	      rkey = ALIGNED_LOAD (rk, r); \
+	      b0 = asm_cipher_be (b0, rkey); \
+	      b1 = asm_cipher_be (b1, rkey); \
+	      b2 = asm_cipher_be (b2, rkey); \
+	      b3 = asm_cipher_be (b3, rkey);
+
+	  DO_ROUND(1);
+	  DO_ROUND(2);
+	  DO_ROUND(3);
+	  DO_ROUND(4);
+	  DO_ROUND(5);
+	  DO_ROUND(6);
+	  DO_ROUND(7);
+	  DO_ROUND(8);
+	  DO_ROUND(9);
+	  if (rounds >= 12)
+	    {
+	      DO_ROUND(10);
+	      DO_ROUND(11);
+	      if (rounds > 12)
+		{
+		  DO_ROUND(12);
+		  DO_ROUND(13);
+		}
+	    }
+
+#undef DO_ROUND
+
+	  rkey = rkeylast;
+	  b0 = asm_cipherlast_be (b0, rkey ^ tweak0);
+	  b1 = asm_cipherlast_be (b1, rkey ^ tweak1);
+	  b2 = asm_cipherlast_be (b2, rkey ^ tweak2);
+	  b3 = asm_cipherlast_be (b3, rkey ^ tweak3);
+
+	  VEC_STORE_BE (out, 0, b0, bige_const);
+	  VEC_STORE_BE (out, 1, b1, bige_const);
+	  VEC_STORE_BE (out, 2, b2, bige_const);
+	  VEC_STORE_BE (out, 3, b3, bige_const);
+
+	  in += 4;
+	  out += 4;
+	  nblocks -= 4;
+	}
+
+      for (; nblocks; nblocks--)
+	{
+	  tweak0 = asm_vperm1 (tweak, bswap128_const);
+
+	  /* Xor-Encrypt/Decrypt-Xor block. */
+	  b = VEC_LOAD_BE (in, 0, bige_const) ^ tweak0;
+
+	  /* Generate next tweak. */
+	  GEN_TWEAK (tweak, tweak);
+
+	  AES_ENCRYPT (b, rounds);
+
+	  b ^= tweak0;
+	  VEC_STORE_BE (out, 0, b, bige_const);
+
+	  in++;
+	  out++;
+	}
+    }
+  else
+    {
+      const u128_t *rk = (u128_t *)&ctx->keyschdec;
+
+      if (!ctx->decryption_prepared)
+	{
+	  internal_aes_ppc_prepare_decryption (ctx);
+	  ctx->decryption_prepared = 1;
+	}
+
+      PRELOAD_ROUND_KEYS (rounds);
+
+      for (; nblocks >= 8; nblocks -= 8)
+	{
+	  b0 = VEC_LOAD_BE_NOSWAP (in, 0);
+	  b1 = VEC_LOAD_BE_NOSWAP (in, 1);
+	  b2 = VEC_LOAD_BE_NOSWAP (in, 2);
+	  b3 = VEC_LOAD_BE_NOSWAP (in, 3);
+	  tweak0 = tweak;
+	  GEN_TWEAK (tweak1, tweak0);
+	  tweak0 = asm_vperm1 (tweak0, bswap128_const);
+	  b4 = VEC_LOAD_BE_NOSWAP (in, 4);
+	  b5 = VEC_LOAD_BE_NOSWAP (in, 5);
+	  GEN_TWEAK (tweak2, tweak1);
+	  tweak1 = asm_vperm1 (tweak1, bswap128_const);
+	  b6 = VEC_LOAD_BE_NOSWAP (in, 6);
+	  b7 = VEC_LOAD_BE_NOSWAP (in, 7);
+	  in += 8;
+
+	  b0 = VEC_BE_SWAP(b0, bige_const);
+	  b1 = VEC_BE_SWAP(b1, bige_const);
+	  GEN_TWEAK (tweak3, tweak2);
+	  tweak2 = asm_vperm1 (tweak2, bswap128_const);
+	  GEN_TWEAK (tweak4, tweak3);
+	  tweak3 = asm_vperm1 (tweak3, bswap128_const);
+	  b2 = VEC_BE_SWAP(b2, bige_const);
+	  b3 = VEC_BE_SWAP(b3, bige_const);
+	  GEN_TWEAK (tweak5, tweak4);
+	  tweak4 = asm_vperm1 (tweak4, bswap128_const);
+	  GEN_TWEAK (tweak6, tweak5);
+	  tweak5 = asm_vperm1 (tweak5, bswap128_const);
+	  b4 = VEC_BE_SWAP(b4, bige_const);
+	  b5 = VEC_BE_SWAP(b5, bige_const);
+	  GEN_TWEAK (tweak7, tweak6);
+	  tweak6 = asm_vperm1 (tweak6, bswap128_const);
+	  GEN_TWEAK (tweak, tweak7);
+	  tweak7 = asm_vperm1 (tweak7, bswap128_const);
+	  b6 = VEC_BE_SWAP(b6, bige_const);
+	  b7 = VEC_BE_SWAP(b7, bige_const);
+
+	  tweak0 = asm_xor (tweak0, rkey0);
+	  tweak1 = asm_xor (tweak1, rkey0);
+	  tweak2 = asm_xor (tweak2, rkey0);
+	  tweak3 = asm_xor (tweak3, rkey0);
+	  tweak4 = asm_xor (tweak4, rkey0);
+	  tweak5 = asm_xor (tweak5, rkey0);
+	  tweak6 = asm_xor (tweak6, rkey0);
+	  tweak7 = asm_xor (tweak7, rkey0);
+
+	  b0 = asm_xor (b0, tweak0);
+	  b1 = asm_xor (b1, tweak1);
+	  b2 = asm_xor (b2, tweak2);
+	  b3 = asm_xor (b3, tweak3);
+	  b4 = asm_xor (b4, tweak4);
+	  b5 = asm_xor (b5, tweak5);
+	  b6 = asm_xor (b6, tweak6);
+	  b7 = asm_xor (b7, tweak7);
+
+#define DO_ROUND(r) \
+	      rkey = ALIGNED_LOAD (rk, r); \
+	      b0 = asm_ncipher_be (b0, rkey); \
+	      b1 = asm_ncipher_be (b1, rkey); \
+	      b2 = asm_ncipher_be (b2, rkey); \
+	      b3 = asm_ncipher_be (b3, rkey); \
+	      b4 = asm_ncipher_be (b4, rkey); \
+	      b5 = asm_ncipher_be (b5, rkey); \
+	      b6 = asm_ncipher_be (b6, rkey); \
+	      b7 = asm_ncipher_be (b7, rkey);
+
+	  DO_ROUND(1);
+	  DO_ROUND(2);
+	  DO_ROUND(3);
+	  DO_ROUND(4);
+	  DO_ROUND(5);
+	  DO_ROUND(6);
+	  DO_ROUND(7);
+
+	  rkeylf = asm_xor (rkeylast, rkey0);
+
+	  DO_ROUND(8);
+
+	  tweak0 = asm_xor (tweak0, rkeylf);
+	  tweak1 = asm_xor (tweak1, rkeylf);
+	  tweak2 = asm_xor (tweak2, rkeylf);
+	  tweak3 = asm_xor (tweak3, rkeylf);
+	  tweak4 = asm_xor (tweak4, rkeylf);
+	  tweak5 = asm_xor (tweak5, rkeylf);
+	  tweak6 = asm_xor (tweak6, rkeylf);
+	  tweak7 = asm_xor (tweak7, rkeylf);
+
+	  DO_ROUND(9);
+	  if (rounds >= 12)
+	    {
+	      DO_ROUND(10);
+	      DO_ROUND(11);
+	      if (rounds > 12)
+		{
+		  DO_ROUND(12);
+		  DO_ROUND(13);
+		}
+	    }
+
+#undef DO_ROUND
+
+	  b0 = asm_ncipherlast_be (b0, tweak0);
+	  b1 = asm_ncipherlast_be (b1, tweak1);
+	  b2 = asm_ncipherlast_be (b2, tweak2);
+	  b3 = asm_ncipherlast_be (b3, tweak3);
+	  b0 = VEC_BE_SWAP (b0, bige_const);
+	  b1 = VEC_BE_SWAP (b1, bige_const);
+	  b4 = asm_ncipherlast_be (b4, tweak4);
+	  b5 = asm_ncipherlast_be (b5, tweak5);
+	  b2 = VEC_BE_SWAP (b2, bige_const);
+	  b3 = VEC_BE_SWAP (b3, bige_const);
+	  b6 = asm_ncipherlast_be (b6, tweak6);
+	  b7 = asm_ncipherlast_be (b7, tweak7);
+	  VEC_STORE_BE_NOSWAP (out, 0, b0);
+	  VEC_STORE_BE_NOSWAP (out, 1, b1);
+	  b4 = VEC_BE_SWAP (b4, bige_const);
+	  b5 = VEC_BE_SWAP (b5, bige_const);
+	  VEC_STORE_BE_NOSWAP (out, 2, b2);
+	  VEC_STORE_BE_NOSWAP (out, 3, b3);
+	  b6 = VEC_BE_SWAP (b6, bige_const);
+	  b7 = VEC_BE_SWAP (b7, bige_const);
+	  VEC_STORE_BE_NOSWAP (out, 4, b4);
+	  VEC_STORE_BE_NOSWAP (out, 5, b5);
+	  VEC_STORE_BE_NOSWAP (out, 6, b6);
+	  VEC_STORE_BE_NOSWAP (out, 7, b7);
+	  out += 8;
+	}
+
+      if (nblocks >= 4)
+	{
+	  tweak0 = tweak;
+	  GEN_TWEAK (tweak1, tweak0);
+	  GEN_TWEAK (tweak2, tweak1);
+	  GEN_TWEAK (tweak3, tweak2);
+	  GEN_TWEAK (tweak, tweak3);
+
+	  b0 = VEC_LOAD_BE (in, 0, bige_const);
+	  b1 = VEC_LOAD_BE (in, 1, bige_const);
+	  b2 = VEC_LOAD_BE (in, 2, bige_const);
+	  b3 = VEC_LOAD_BE (in, 3, bige_const);
+
+	  tweak0 = asm_vperm1 (tweak0, bswap128_const);
+	  tweak1 = asm_vperm1 (tweak1, bswap128_const);
+	  tweak2 = asm_vperm1 (tweak2, bswap128_const);
+	  tweak3 = asm_vperm1 (tweak3, bswap128_const);
+
+	  b0 ^= tweak0 ^ rkey0;
+	  b1 ^= tweak1 ^ rkey0;
+	  b2 ^= tweak2 ^ rkey0;
+	  b3 ^= tweak3 ^ rkey0;
+
+#define DO_ROUND(r) \
+	      rkey = ALIGNED_LOAD (rk, r); \
+	      b0 = asm_ncipher_be (b0, rkey); \
+	      b1 = asm_ncipher_be (b1, rkey); \
+	      b2 = asm_ncipher_be (b2, rkey); \
+	      b3 = asm_ncipher_be (b3, rkey);
+
+	  DO_ROUND(1);
+	  DO_ROUND(2);
+	  DO_ROUND(3);
+	  DO_ROUND(4);
+	  DO_ROUND(5);
+	  DO_ROUND(6);
+	  DO_ROUND(7);
+	  DO_ROUND(8);
+	  DO_ROUND(9);
+	  if (rounds >= 12)
+	    {
+	      DO_ROUND(10);
+	      DO_ROUND(11);
+	      if (rounds > 12)
+		{
+		  DO_ROUND(12);
+		  DO_ROUND(13);
+		}
+	    }
+
+#undef DO_ROUND
+
+	  rkey = rkeylast;
+	  b0 = asm_ncipherlast_be (b0, rkey ^ tweak0);
+	  b1 = asm_ncipherlast_be (b1, rkey ^ tweak1);
+	  b2 = asm_ncipherlast_be (b2, rkey ^ tweak2);
+	  b3 = asm_ncipherlast_be (b3, rkey ^ tweak3);
+
+	  VEC_STORE_BE (out, 0, b0, bige_const);
+	  VEC_STORE_BE (out, 1, b1, bige_const);
+	  VEC_STORE_BE (out, 2, b2, bige_const);
+	  VEC_STORE_BE (out, 3, b3, bige_const);
+
+	  in += 4;
+	  out += 4;
+	  nblocks -= 4;
+	}
+
+      for (; nblocks; nblocks--)
+	{
+	  tweak0 = asm_vperm1 (tweak, bswap128_const);
+
+	  /* Xor-Encrypt/Decrypt-Xor block. */
+	  b = VEC_LOAD_BE (in, 0, bige_const) ^ tweak0;
+
+	  /* Generate next tweak. */
+	  GEN_TWEAK (tweak, tweak);
+
+	  AES_DECRYPT (b, rounds);
+
+	  b ^= tweak0;
+	  VEC_STORE_BE (out, 0, b, bige_const);
+
+	  in++;
+	  out++;
+	}
+    }
+
+  tweak = asm_vperm1 (tweak, bswap128_const);
+  VEC_STORE_BE (tweak_arg, 0, tweak, bige_const);
+
+#undef GEN_TWEAK
+}
diff -up libgcrypt-1.8.5/cipher/rijndael-ssse3-amd64.c.aes-perf libgcrypt-1.8.5/cipher/rijndael-ssse3-amd64.c
--- libgcrypt-1.8.5/cipher/rijndael-ssse3-amd64.c.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/rijndael-ssse3-amd64.c	2020-04-22 18:29:41.679862057 +0200
@@ -208,11 +208,11 @@ _gcry_aes_ssse3_do_setkey (RIJNDAEL_cont
 
 
 /* Make a decryption key from an encryption key. */
-void
-_gcry_aes_ssse3_prepare_decryption (RIJNDAEL_context *ctx)
+static inline void
+do_ssse3_prepare_decryption (RIJNDAEL_context *ctx,
+                             byte ssse3_state[SSSE3_STATE_SIZE])
 {
   unsigned int keybits = (ctx->rounds - 10) * 32 + 128;
-  byte ssse3_state[SSSE3_STATE_SIZE];
 
   vpaes_ssse3_prepare();
 
@@ -237,6 +237,14 @@ _gcry_aes_ssse3_prepare_decryption (RIJN
   vpaes_ssse3_cleanup();
 }
 
+void
+_gcry_aes_ssse3_prepare_decryption (RIJNDAEL_context *ctx)
+{
+  byte ssse3_state[SSSE3_STATE_SIZE];
+
+  do_ssse3_prepare_decryption(ctx, ssse3_state);
+}
+
 
 /* Encrypt one block using the Intel SSSE3 instructions.  Block is input
 * and output through SSE register xmm0. */
@@ -295,9 +303,9 @@ _gcry_aes_ssse3_encrypt (const RIJNDAEL_
 
 
 void
-_gcry_aes_ssse3_cfb_enc (RIJNDAEL_context *ctx, unsigned char *outbuf,
-                        const unsigned char *inbuf, unsigned char *iv,
-                        size_t nblocks)
+_gcry_aes_ssse3_cfb_enc (RIJNDAEL_context *ctx, unsigned char *iv,
+                         unsigned char *outbuf, const unsigned char *inbuf,
+                         size_t nblocks)
 {
   unsigned int nrounds = ctx->rounds;
   byte ssse3_state[SSSE3_STATE_SIZE];
@@ -334,9 +342,9 @@ _gcry_aes_ssse3_cfb_enc (RIJNDAEL_contex
 
 
 void
-_gcry_aes_ssse3_cbc_enc (RIJNDAEL_context *ctx, unsigned char *outbuf,
-                        const unsigned char *inbuf, unsigned char *iv,
-                        size_t nblocks, int cbc_mac)
+_gcry_aes_ssse3_cbc_enc (RIJNDAEL_context *ctx, unsigned char *iv,
+                         unsigned char *outbuf, const unsigned char *inbuf,
+                         size_t nblocks, int cbc_mac)
 {
   unsigned int nrounds = ctx->rounds;
   byte ssse3_state[SSSE3_STATE_SIZE];
@@ -379,9 +387,9 @@ _gcry_aes_ssse3_cbc_enc (RIJNDAEL_contex
 
 
 void
-_gcry_aes_ssse3_ctr_enc (RIJNDAEL_context *ctx, unsigned char *outbuf,
-                        const unsigned char *inbuf, unsigned char *ctr,
-                        size_t nblocks)
+_gcry_aes_ssse3_ctr_enc (RIJNDAEL_context *ctx, unsigned char *ctr,
+                         unsigned char *outbuf, const unsigned char *inbuf,
+                         size_t nblocks)
 {
   static const unsigned char be_mask[16] __attribute__ ((aligned (16))) =
     { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
@@ -447,7 +455,7 @@ _gcry_aes_ssse3_ctr_enc (RIJNDAEL_contex
 
 unsigned int
 _gcry_aes_ssse3_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst,
-                        const unsigned char *src)
+                         const unsigned char *src)
 {
   unsigned int nrounds = ctx->rounds;
   byte ssse3_state[SSSE3_STATE_SIZE];
@@ -468,9 +476,9 @@ _gcry_aes_ssse3_decrypt (const RIJNDAEL_
 
 
 void
-_gcry_aes_ssse3_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
-                        const unsigned char *inbuf, unsigned char *iv,
-                        size_t nblocks)
+_gcry_aes_ssse3_cfb_dec (RIJNDAEL_context *ctx, unsigned char *iv,
+                         unsigned char *outbuf, const unsigned char *inbuf,
+                         size_t nblocks)
 {
   unsigned int nrounds = ctx->rounds;
   byte ssse3_state[SSSE3_STATE_SIZE];
@@ -508,13 +516,19 @@ _gcry_aes_ssse3_cfb_dec (RIJNDAEL_contex
 
 
 void
-_gcry_aes_ssse3_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
-                        const unsigned char *inbuf, unsigned char *iv,
-                        size_t nblocks)
+_gcry_aes_ssse3_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv,
+                         unsigned char *outbuf, const unsigned char *inbuf,
+                         size_t nblocks)
 {
   unsigned int nrounds = ctx->rounds;
   byte ssse3_state[SSSE3_STATE_SIZE];
 
+  if ( !ctx->decryption_prepared )
+    {
+      do_ssse3_prepare_decryption ( ctx, ssse3_state );
+      ctx->decryption_prepared = 1;
+    }
+
   vpaes_ssse3_prepare_dec ();
 
   asm volatile ("movdqu %[iv], %%xmm7\n\t"	/* use xmm7 as fast IV storage */
@@ -626,6 +640,12 @@ ssse3_ocb_dec (gcry_cipher_hd_t c, void
   unsigned int nrounds = ctx->rounds;
   byte ssse3_state[SSSE3_STATE_SIZE];
 
+  if ( !ctx->decryption_prepared )
+    {
+      do_ssse3_prepare_decryption ( ctx, ssse3_state );
+      ctx->decryption_prepared = 1;
+    }
+
   vpaes_ssse3_prepare_dec ();
 
   /* Preload Offset and Checksum */
@@ -679,7 +699,7 @@ ssse3_ocb_dec (gcry_cipher_hd_t c, void
 }
 
 
-void
+size_t
 _gcry_aes_ssse3_ocb_crypt(gcry_cipher_hd_t c, void *outbuf_arg,
                           const void *inbuf_arg, size_t nblocks, int encrypt)
 {
@@ -687,10 +707,12 @@ _gcry_aes_ssse3_ocb_crypt(gcry_cipher_hd
     ssse3_ocb_enc(c, outbuf_arg, inbuf_arg, nblocks);
   else
     ssse3_ocb_dec(c, outbuf_arg, inbuf_arg, nblocks);
+
+  return 0;
 }
 
 
-void
+size_t
 _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
                           size_t nblocks)
 {
@@ -746,6 +768,8 @@ _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd
                 : "memory" );
 
   vpaes_ssse3_cleanup ();
+
+  return 0;
 }
 
 #endif /* USE_SSSE3 */
diff -up libgcrypt-1.8.5/cipher/salsa20.c.aes-perf libgcrypt-1.8.5/cipher/salsa20.c
--- libgcrypt-1.8.5/cipher/salsa20.c.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/salsa20.c	2020-04-22 18:29:41.679862057 +0200
@@ -366,10 +366,12 @@ salsa20_do_setkey (SALSA20_context_t *ct
 
 
 static gcry_err_code_t
-salsa20_setkey (void *context, const byte *key, unsigned int keylen)
+salsa20_setkey (void *context, const byte *key, unsigned int keylen,
+                gcry_cipher_hd_t hd)
 {
   SALSA20_context_t *ctx = (SALSA20_context_t *)context;
   gcry_err_code_t rc = salsa20_do_setkey (ctx, key, keylen);
+  (void)hd;
   _gcry_burn_stack (4 + sizeof (void *) + 4 * sizeof (void *));
   return rc;
 }
@@ -522,7 +524,7 @@ selftest (void)
   /* 16-byte alignment required for amd64 implementation. */
   ctx = (SALSA20_context_t *)((uintptr_t)(ctxbuf + 15) & ~(uintptr_t)15);
 
-  salsa20_setkey (ctx, key_1, sizeof key_1);
+  salsa20_setkey (ctx, key_1, sizeof key_1, NULL);
   salsa20_setiv  (ctx, nonce_1, sizeof nonce_1);
   scratch[8] = 0;
   salsa20_encrypt_stream (ctx, scratch, plaintext_1, sizeof plaintext_1);
@@ -530,7 +532,7 @@ selftest (void)
     return "Salsa20 encryption test 1 failed.";
   if (scratch[8])
     return "Salsa20 wrote too much.";
-  salsa20_setkey( ctx, key_1, sizeof(key_1));
+  salsa20_setkey( ctx, key_1, sizeof(key_1), NULL);
   salsa20_setiv  (ctx, nonce_1, sizeof nonce_1);
   salsa20_encrypt_stream (ctx, scratch, scratch, sizeof plaintext_1);
   if (memcmp (scratch, plaintext_1, sizeof plaintext_1))
@@ -538,12 +540,12 @@ selftest (void)
 
   for (i = 0; i < sizeof buf; i++)
     buf[i] = i;
-  salsa20_setkey (ctx, key_1, sizeof key_1);
+  salsa20_setkey (ctx, key_1, sizeof key_1, NULL);
   salsa20_setiv (ctx, nonce_1, sizeof nonce_1);
   /*encrypt*/
   salsa20_encrypt_stream (ctx, buf, buf, sizeof buf);
   /*decrypt*/
-  salsa20_setkey (ctx, key_1, sizeof key_1);
+  salsa20_setkey (ctx, key_1, sizeof key_1, NULL);
   salsa20_setiv (ctx, nonce_1, sizeof nonce_1);
   salsa20_encrypt_stream (ctx, buf, buf, 1);
   salsa20_encrypt_stream (ctx, buf+1, buf+1, (sizeof buf)-1-1);
diff -up libgcrypt-1.8.5/cipher/seed.c.aes-perf libgcrypt-1.8.5/cipher/seed.c
--- libgcrypt-1.8.5/cipher/seed.c.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/seed.c	2020-04-22 18:29:41.680862038 +0200
@@ -309,11 +309,12 @@ do_setkey (SEED_context *ctx, const byte
 }
 
 static gcry_err_code_t
-seed_setkey (void *context, const byte *key, const unsigned keylen)
+seed_setkey (void *context, const byte *key, const unsigned keylen,
+             gcry_cipher_hd_t hd)
 {
   SEED_context *ctx = context;
-
   int rc = do_setkey (ctx, key, keylen);
+  (void)hd;
   _gcry_burn_stack (4*6 + sizeof(void*)*2 + sizeof(int)*2);
   return rc;
 }
@@ -446,7 +447,7 @@ selftest (void)
     0x22, 0x6B, 0xC3, 0x14, 0x2C, 0xD4, 0x0D, 0x4A,
   };
 
-  seed_setkey (&ctx, key, sizeof(key));
+  seed_setkey (&ctx, key, sizeof(key), NULL);
   seed_encrypt (&ctx, scratch, plaintext);
   if (memcmp (scratch, ciphertext, sizeof (ciphertext)))
     return "SEED test encryption failed.";
diff -up libgcrypt-1.8.5/cipher/serpent.c.aes-perf libgcrypt-1.8.5/cipher/serpent.c
--- libgcrypt-1.8.5/cipher/serpent.c.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/serpent.c	2020-04-22 18:29:41.680862038 +0200
@@ -748,13 +748,16 @@ serpent_setkey_internal (serpent_context
 /* Initialize CTX with the key KEY of KEY_LENGTH bytes.  */
 static gcry_err_code_t
 serpent_setkey (void *ctx,
-		const byte *key, unsigned int key_length)
+		const byte *key, unsigned int key_length,
+                gcry_cipher_hd_t hd)
 {
   serpent_context_t *context = ctx;
   static const char *serpent_test_ret;
   static int serpent_init_done;
   gcry_err_code_t ret = GPG_ERR_NO_ERROR;
 
+  (void)hd;
+
   if (! serpent_init_done)
     {
       /* Execute a self-test the first time, Serpent is used.  */
@@ -999,7 +1002,7 @@ _gcry_serpent_ctr_enc(void *context, uns
       /* Encrypt the counter. */
       serpent_encrypt_internal(ctx, ctr, tmpbuf);
       /* XOR the input with the encrypted counter and store in output.  */
-      buf_xor(outbuf, tmpbuf, inbuf, sizeof(serpent_block_t));
+      cipher_block_xor(outbuf, tmpbuf, inbuf, sizeof(serpent_block_t));
       outbuf += sizeof(serpent_block_t);
       inbuf  += sizeof(serpent_block_t);
       /* Increment the counter.  */
@@ -1114,7 +1117,8 @@ _gcry_serpent_cbc_dec(void *context, uns
          the intermediate result to SAVEBUF.  */
       serpent_decrypt_internal (ctx, inbuf, savebuf);
 
-      buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, sizeof(serpent_block_t));
+      cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf,
+                                sizeof(serpent_block_t));
       inbuf += sizeof(serpent_block_t);
       outbuf += sizeof(serpent_block_t);
     }
@@ -1218,7 +1222,7 @@ _gcry_serpent_cfb_dec(void *context, uns
   for ( ;nblocks; nblocks-- )
     {
       serpent_encrypt_internal(ctx, iv, iv);
-      buf_xor_n_copy(outbuf, iv, inbuf, sizeof(serpent_block_t));
+      cipher_block_xor_n_copy(outbuf, iv, inbuf, sizeof(serpent_block_t));
       outbuf += sizeof(serpent_block_t);
       inbuf  += sizeof(serpent_block_t);
     }
diff -up libgcrypt-1.8.5/cipher/twofish.c.aes-perf libgcrypt-1.8.5/cipher/twofish.c
--- libgcrypt-1.8.5/cipher/twofish.c.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/cipher/twofish.c	2020-04-22 18:29:41.680862038 +0200
@@ -734,12 +734,15 @@ do_twofish_setkey (TWOFISH_context *ctx,
 }
 
 static gcry_err_code_t
-twofish_setkey (void *context, const byte *key, unsigned int keylen)
+twofish_setkey (void *context, const byte *key, unsigned int keylen,
+                gcry_cipher_hd_t hd)
 {
   TWOFISH_context *ctx = context;
   unsigned int hwfeatures = _gcry_get_hw_features ();
   int rc;
 
+  (void)hd;
+
   rc = do_twofish_setkey (ctx, key, keylen);
 
 #ifdef USE_AVX2
@@ -1245,7 +1248,7 @@ _gcry_twofish_ctr_enc(void *context, uns
         burn_stack_depth = burn;
 
       /* XOR the input with the encrypted counter and store in output.  */
-      buf_xor(outbuf, tmpbuf, inbuf, TWOFISH_BLOCKSIZE);
+      cipher_block_xor(outbuf, tmpbuf, inbuf, TWOFISH_BLOCKSIZE);
       outbuf += TWOFISH_BLOCKSIZE;
       inbuf  += TWOFISH_BLOCKSIZE;
       /* Increment the counter.  */
@@ -1327,7 +1330,7 @@ _gcry_twofish_cbc_dec(void *context, uns
       if (burn > burn_stack_depth)
         burn_stack_depth = burn;
 
-      buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, TWOFISH_BLOCKSIZE);
+      cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, TWOFISH_BLOCKSIZE);
       inbuf += TWOFISH_BLOCKSIZE;
       outbuf += TWOFISH_BLOCKSIZE;
     }
@@ -1399,7 +1402,7 @@ _gcry_twofish_cfb_dec(void *context, uns
       if (burn > burn_stack_depth)
         burn_stack_depth = burn;
 
-      buf_xor_n_copy(outbuf, iv, inbuf, TWOFISH_BLOCKSIZE);
+      cipher_block_xor_n_copy(outbuf, iv, inbuf, TWOFISH_BLOCKSIZE);
       outbuf += TWOFISH_BLOCKSIZE;
       inbuf += TWOFISH_BLOCKSIZE;
     }
@@ -1710,7 +1713,7 @@ selftest (void)
     0x05, 0x93, 0x1C, 0xB6, 0xD4, 0x08, 0xE7, 0xFA
   };
 
-  twofish_setkey (&ctx, key, sizeof(key));
+  twofish_setkey (&ctx, key, sizeof(key), NULL);
   twofish_encrypt (&ctx, scratch, plaintext);
   if (memcmp (scratch, ciphertext, sizeof (ciphertext)))
     return "Twofish-128 test encryption failed.";
@@ -1718,7 +1721,7 @@ selftest (void)
   if (memcmp (scratch, plaintext, sizeof (plaintext)))
     return "Twofish-128 test decryption failed.";
 
-  twofish_setkey (&ctx, key_256, sizeof(key_256));
+  twofish_setkey (&ctx, key_256, sizeof(key_256), NULL);
   twofish_encrypt (&ctx, scratch, plaintext_256);
   if (memcmp (scratch, ciphertext_256, sizeof (ciphertext_256)))
     return "Twofish-256 test encryption failed.";
@@ -1800,13 +1803,13 @@ main()
   /* Encryption test. */
   for (i = 0; i < 125; i++)
     {
-      twofish_setkey (&ctx, buffer[0], sizeof (buffer[0]));
+      twofish_setkey (&ctx, buffer[0], sizeof (buffer[0]), NULL);
       for (j = 0; j < 1000; j++)
         twofish_encrypt (&ctx, buffer[2], buffer[2]);
-      twofish_setkey (&ctx, buffer[1], sizeof (buffer[1]));
+      twofish_setkey (&ctx, buffer[1], sizeof (buffer[1]), NULL);
       for (j = 0; j < 1000; j++)
         twofish_encrypt (&ctx, buffer[3], buffer[3]);
-      twofish_setkey (&ctx, buffer[2], sizeof (buffer[2])*2);
+      twofish_setkey (&ctx, buffer[2], sizeof (buffer[2])*2, NULL);
       for (j = 0; j < 1000; j++) {
         twofish_encrypt (&ctx, buffer[0], buffer[0]);
         twofish_encrypt (&ctx, buffer[1], buffer[1]);
@@ -1818,15 +1821,15 @@ main()
   /* Decryption test. */
   for (i = 0; i < 125; i++)
     {
-      twofish_setkey (&ctx, buffer[2], sizeof (buffer[2])*2);
+      twofish_setkey (&ctx, buffer[2], sizeof (buffer[2])*2, NULL);
       for (j = 0; j < 1000; j++) {
         twofish_decrypt (&ctx, buffer[0], buffer[0]);
         twofish_decrypt (&ctx, buffer[1], buffer[1]);
       }
-      twofish_setkey (&ctx, buffer[1], sizeof (buffer[1]));
+      twofish_setkey (&ctx, buffer[1], sizeof (buffer[1]), NULL);
       for (j = 0; j < 1000; j++)
         twofish_decrypt (&ctx, buffer[3], buffer[3]);
-      twofish_setkey (&ctx, buffer[0], sizeof (buffer[0]));
+      twofish_setkey (&ctx, buffer[0], sizeof (buffer[0]), NULL);
       for (j = 0; j < 1000; j++)
         twofish_decrypt (&ctx, buffer[2], buffer[2]);
     }
diff -up libgcrypt-1.8.5/configure.ac.aes-perf libgcrypt-1.8.5/configure.ac
--- libgcrypt-1.8.5/configure.ac.aes-perf	2020-04-22 18:29:41.655862516 +0200
+++ libgcrypt-1.8.5/configure.ac	2020-04-22 18:29:41.681862019 +0200
@@ -649,6 +649,14 @@ AC_ARG_ENABLE(arm-crypto-support,
 	      armcryptosupport=$enableval,armcryptosupport=yes)
 AC_MSG_RESULT($armcryptosupport)
 
+# Implementation of the --disable-ppc-crypto-support switch.
+AC_MSG_CHECKING([whether PPC crypto support is requested])
+AC_ARG_ENABLE(ppc-crypto-support,
+              AC_HELP_STRING([--disable-ppc-crypto-support],
+                 [Disable support for the PPC crypto instructions introduced in POWER 8 (PowerISA 2.07)]),
+              ppccryptosupport=$enableval,ppccryptosupport=yes)
+AC_MSG_RESULT($ppccryptosupport)
+
 # Implementation of the --disable-O-flag-munging switch.
 AC_MSG_CHECKING([whether a -O flag munging is requested])
 AC_ARG_ENABLE([O-flag-munging],
@@ -1196,6 +1204,9 @@ if test "$mpi_cpu_arch" != "arm" ; then
    fi
 fi
 
+if test "$mpi_cpu_arch" != "ppc"; then
+   ppccryptosupport="n/a"
+fi
 
 #############################################
 ####                                     ####
@@ -1722,6 +1733,113 @@ if test "$gcry_cv_gcc_inline_asm_aarch64
 fi
 
 
+#
+# Check whether PowerPC AltiVec/VSX intrinsics
+#
+AC_CACHE_CHECK([whether compiler supports PowerPC AltiVec/VSX intrinsics],
+      [gcry_cv_cc_ppc_altivec],
+      [if test "$mpi_cpu_arch" != "ppc" ; then
+	gcry_cv_cc_ppc_altivec="n/a"
+      else
+	gcry_cv_cc_ppc_altivec=no
+	AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+	[[#include <altivec.h>
+	  typedef vector unsigned char block;
+	  block fn(block in)
+	  {
+	    block t = vec_perm (in, in, vec_vsx_ld (0, (unsigned char*)0));
+	    return vec_cipher_be (t, in);
+	  }
+	  ]])],
+	[gcry_cv_cc_ppc_altivec=yes])
+      fi])
+if test "$gcry_cv_cc_ppc_altivec" = "yes" ; then
+    AC_DEFINE(HAVE_COMPATIBLE_CC_PPC_ALTIVEC,1,
+	    [Defined if underlying compiler supports PowerPC AltiVec/VSX/crypto intrinsics])
+fi
+
+_gcc_cflags_save=$CFLAGS
+CFLAGS="$CFLAGS -maltivec -mvsx -mcrypto"
+
+if test "$gcry_cv_cc_ppc_altivec" = "no" &&
+    test "$mpi_cpu_arch" = "ppc" ; then
+  AC_CACHE_CHECK([whether compiler supports PowerPC AltiVec/VSX/crypto intrinsics with extra GCC flags],
+    [gcry_cv_cc_ppc_altivec_cflags],
+    [gcry_cv_cc_ppc_altivec_cflags=no
+    AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+      [[#include <altivec.h>
+	typedef vector unsigned char block;
+	block fn(block in)
+	{
+	  block t = vec_perm (in, in, vec_vsx_ld (0, (unsigned char*)0));
+	  return vec_cipher_be (t, in);
+	}]])],
+      [gcry_cv_cc_ppc_altivec_cflags=yes])])
+  if test "$gcry_cv_cc_ppc_altivec_cflags" = "yes" ; then
+    AC_DEFINE(HAVE_COMPATIBLE_CC_PPC_ALTIVEC,1,
+	      [Defined if underlying compiler supports PowerPC AltiVec/VSX/crypto intrinsics])
+    AC_DEFINE(HAVE_COMPATIBLE_CC_PPC_ALTIVEC_WITH_CFLAGS,1,
+	      [Defined if underlying compiler supports PowerPC AltiVec/VSX/crypto intrinsics with extra GCC flags])
+  fi
+fi
+
+AM_CONDITIONAL(ENABLE_PPC_VCRYPTO_EXTRA_CFLAGS,
+	       test "$gcry_cv_cc_ppc_altivec_cflags" = "yes")
+
+# Restore flags.
+CFLAGS=$_gcc_cflags_save;
+
+
+#
+# Check whether GCC inline assembler supports PowerPC AltiVec/VSX/crypto instructions
+#
+AC_CACHE_CHECK([whether GCC inline assembler supports PowerPC AltiVec/VSX/crypto instructions],
+       [gcry_cv_gcc_inline_asm_ppc_altivec],
+       [if test "$mpi_cpu_arch" != "ppc" ; then
+          gcry_cv_gcc_inline_asm_ppc_altivec="n/a"
+        else
+          gcry_cv_gcc_inline_asm_ppc_altivec=no
+          AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+          [[__asm__(".globl testfn;\n"
+		    "testfn:\n"
+		    "stvx %v31,%r12,%r0;\n"
+		    "lvx  %v20,%r12,%r0;\n"
+		    "vcipher %v0, %v1, %v22;\n"
+		    "lxvw4x %vs32, %r0, %r1;\n"
+		    "vadduwm %v0, %v1, %v22;\n"
+		  );
+            ]])],
+          [gcry_cv_gcc_inline_asm_ppc_altivec=yes])
+        fi])
+if test "$gcry_cv_gcc_inline_asm_ppc_altivec" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC,1,
+     [Defined if inline assembler supports PowerPC AltiVec/VSX/crypto instructions])
+fi
+
+
+#
+# Check whether GCC inline assembler supports PowerISA 3.00 instructions
+#
+AC_CACHE_CHECK([whether GCC inline assembler supports PowerISA 3.00 instructions],
+       [gcry_cv_gcc_inline_asm_ppc_arch_3_00],
+       [if test "$mpi_cpu_arch" != "ppc" ; then
+          gcry_cv_gcc_inline_asm_ppc_arch_3_00="n/a"
+        else
+          gcry_cv_gcc_inline_asm_ppc_arch_3_00=no
+          AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+          [[__asm__(".globl testfn;\n"
+		    "testfn:\n"
+		    "stxvb16x %r1,%v12,%v30;\n"
+		  );
+            ]])],
+          [gcry_cv_gcc_inline_asm_ppc_arch_3_00=yes])
+        fi])
+if test "$gcry_cv_gcc_inline_asm_ppc_arch_3_00" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_INLINE_ASM_PPC_ARCH_3_00,1,
+     [Defined if inline assembler supports PowerISA 3.00 instructions])
+fi
+
+
 #######################################
 #### Checks for library functions. ####
 #######################################
@@ -1999,6 +2117,10 @@ if test x"$armcryptosupport" = xyes ; th
   AC_DEFINE(ENABLE_ARM_CRYPTO_SUPPORT,1,
             [Enable support for ARMv8 Crypto Extension instructions.])
 fi
+if test x"$ppccryptosupport" = xyes ; then
+  AC_DEFINE(ENABLE_PPC_CRYPTO_SUPPORT,1,
+            [Enable support for POWER 8 (PowerISA 2.07) crypto extension.])
+fi
 if test x"$jentsupport" = xyes ; then
   AC_DEFINE(ENABLE_JENT_SUPPORT, 1,
             [Enable support for the jitter entropy collector.])
@@ -2106,6 +2228,21 @@ if test "$found" = "1" ; then
          GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-armv8-ce.lo"
          GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-armv8-aarch64-ce.lo"
       ;;
+      powerpc64le-*-*)
+         # Build with the crypto extension implementation
+         GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc.lo"
+         GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc9le.lo"
+      ;;
+      powerpc64-*-*)
+         # Big-Endian.
+         # Build with the crypto extension implementation
+         GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc.lo"
+      ;;
+      powerpc-*-*)
+         # Big-Endian.
+         # Build with the crypto extension implementation
+         GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc.lo"
+      ;;
    esac
 
    case "$mpi_cpu_arch" in
@@ -2555,6 +2692,7 @@ case "$mpi_cpu_arch" in
         ;;
      ppc)
         AC_DEFINE(HAVE_CPU_ARCH_PPC, 1,   [Defined for PPC platforms])
+        GCRYPT_HWF_MODULES="hwf-ppc.lo"
         ;;
      arm)
         AC_DEFINE(HAVE_CPU_ARCH_ARM, 1,   [Defined for ARM platforms])
@@ -2653,6 +2791,7 @@ GCRY_MSG_SHOW([Try using Intel AVX:
 GCRY_MSG_SHOW([Try using Intel AVX2:     ],[$avx2support])
 GCRY_MSG_SHOW([Try using ARM NEON:       ],[$neonsupport])
 GCRY_MSG_SHOW([Try using ARMv8 crypto:   ],[$armcryptosupport])
+GCRY_MSG_SHOW([Try using PPC crypto:     ],[$ppccryptosupport])
 GCRY_MSG_SHOW([],[])
 
 if test "x${gpg_config_script_warn}" != x; then
diff -up libgcrypt-1.8.5/src/cipher.h.aes-perf libgcrypt-1.8.5/src/cipher.h
--- libgcrypt-1.8.5/src/cipher.h.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/src/cipher.h	2020-04-22 18:29:41.681862019 +0200
@@ -158,6 +158,9 @@ size_t _gcry_aes_ocb_crypt (gcry_cipher_
 			    const void *inbuf_arg, size_t nblocks, int encrypt);
 size_t _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 			   size_t nblocks);
+void _gcry_aes_xts_crypt (void *context, unsigned char *tweak,
+			  void *outbuf_arg, const void *inbuf_arg,
+			  size_t nblocks, int encrypt);
 
 /*-- blowfish.c --*/
 void _gcry_blowfish_cfb_dec (void *context, unsigned char *iv,
diff -up libgcrypt-1.8.5/src/cipher-proto.h.aes-perf libgcrypt-1.8.5/src/cipher-proto.h
--- libgcrypt-1.8.5/src/cipher-proto.h.aes-perf	2020-04-22 18:29:41.643862745 +0200
+++ libgcrypt-1.8.5/src/cipher-proto.h	2020-04-22 18:29:41.681862019 +0200
@@ -132,7 +132,8 @@ typedef struct gcry_pk_spec
 /* Type for the cipher_setkey function.  */
 typedef gcry_err_code_t (*gcry_cipher_setkey_t) (void *c,
 						 const unsigned char *key,
-						 unsigned keylen);
+						 unsigned keylen,
+						 gcry_cipher_hd_t hd);
 
 /* Type for the cipher_encrypt function.  */
 typedef unsigned int (*gcry_cipher_encrypt_t) (void *c,
diff -up libgcrypt-1.8.5/src/g10lib.h.aes-perf libgcrypt-1.8.5/src/g10lib.h
--- libgcrypt-1.8.5/src/g10lib.h.aes-perf	2020-04-22 18:29:41.660862420 +0200
+++ libgcrypt-1.8.5/src/g10lib.h	2020-04-22 18:50:46.990661309 +0200
@@ -233,7 +233,9 @@ char **_gcry_strtokenize (const char *st
 
 #define HWF_INTEL_RDTSC         (1 << 20)
 
-
+#define HWF_PPC_VCRYPTO         (1 << 22)
+#define HWF_PPC_ARCH_3_00       (1 << 23)
+#define HWF_PPC_ARCH_2_07       (1 << 24)
 
 gpg_err_code_t _gcry_disable_hw_feature (const char *name);
 void _gcry_detect_hw_features (void);
diff -up libgcrypt-1.8.5/src/hwf-common.h.aes-perf libgcrypt-1.8.5/src/hwf-common.h
--- libgcrypt-1.8.5/src/hwf-common.h.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/src/hwf-common.h	2020-04-22 18:29:41.682862000 +0200
@@ -22,6 +22,6 @@
 
 unsigned int _gcry_hwf_detect_x86 (void);
 unsigned int _gcry_hwf_detect_arm (void);
-
+unsigned int _gcry_hwf_detect_ppc (void);
 
 #endif /*HWF_COMMON_H*/
diff -up libgcrypt-1.8.5/src/hwfeatures.c.aes-perf libgcrypt-1.8.5/src/hwfeatures.c
--- libgcrypt-1.8.5/src/hwfeatures.c.aes-perf	2017-11-23 19:16:58.000000000 +0100
+++ libgcrypt-1.8.5/src/hwfeatures.c	2020-04-22 18:51:48.326487879 +0200
@@ -42,6 +42,7 @@ static struct
   const char *desc;
 } hwflist[] =
   {
+#if defined(HAVE_CPU_ARCH_X86)
     { HWF_PADLOCK_RNG,         "padlock-rng" },
     { HWF_PADLOCK_AES,         "padlock-aes" },
     { HWF_PADLOCK_SHA,         "padlock-sha" },
@@ -58,11 +59,17 @@ static struct
     { HWF_INTEL_AVX2,          "intel-avx2" },
     { HWF_INTEL_FAST_VPGATHER, "intel-fast-vpgather" },
     { HWF_INTEL_RDTSC,         "intel-rdtsc" },
+#elif defined(HAVE_CPU_ARCH_ARM)
     { HWF_ARM_NEON,            "arm-neon" },
     { HWF_ARM_AES,             "arm-aes" },
     { HWF_ARM_SHA1,            "arm-sha1" },
     { HWF_ARM_SHA2,            "arm-sha2" },
-    { HWF_ARM_PMULL,           "arm-pmull" }
+    { HWF_ARM_PMULL,           "arm-pmull" },
+#elif defined(HAVE_CPU_ARCH_PPC)
+    { HWF_PPC_VCRYPTO,         "ppc-vcrypto" },
+    { HWF_PPC_ARCH_3_00,       "ppc-arch_3_00" },
+    { HWF_PPC_ARCH_2_07,       "ppc-arch_2_07" },
+#endif
   };
 
 /* A bit vector with the hardware features which shall not be used.
@@ -207,12 +214,14 @@ _gcry_detect_hw_features (void)
   {
     hw_features = _gcry_hwf_detect_x86 ();
   }
-#endif /* HAVE_CPU_ARCH_X86 */
-#if defined (HAVE_CPU_ARCH_ARM)
+#elif defined (HAVE_CPU_ARCH_ARM)
   {
     hw_features = _gcry_hwf_detect_arm ();
   }
-#endif /* HAVE_CPU_ARCH_ARM */
-
+#elif defined (HAVE_CPU_ARCH_PPC)
+  {
+    hw_features = _gcry_hwf_detect_ppc ();
+  }
+#endif
   hw_features &= ~disabled_hw_features;
 }
diff -up libgcrypt-1.8.5/src/hwf-ppc.c.aes-perf libgcrypt-1.8.5/src/hwf-ppc.c
--- libgcrypt-1.8.5/src/hwf-ppc.c.aes-perf	2020-04-22 18:29:41.682862000 +0200
+++ libgcrypt-1.8.5/src/hwf-ppc.c	2020-04-22 18:50:21.396150974 +0200
@@ -0,0 +1,243 @@
+/* hwf-ppc.c - Detect hardware features - PPC part
+ * Copyright (C) 2013,2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ * Copyright (C) 2019 Shawn Landden <shawn@git.icu>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <errno.h>
+#if defined(HAVE_SYS_AUXV_H) && (defined(HAVE_GETAUXVAL) || \
+    defined(HAVE_ELF_AUX_INFO))
+#include <sys/auxv.h>
+#endif
+
+#include "g10lib.h"
+#include "hwf-common.h"
+
+#if !defined (__powerpc__) && !defined (__powerpc64__)
+# error Module build for wrong CPU.
+#endif
+
+
+#if defined(HAVE_SYS_AUXV_H) && defined(HAVE_ELF_AUX_INFO) && \
+    !defined(HAVE_GETAUXVAL) && defined(AT_HWCAP)
+#define HAVE_GETAUXVAL
+static unsigned long getauxval(unsigned long type)
+{
+  unsigned long auxval = 0;
+  int err;
+
+  /* FreeBSD provides 'elf_aux_info' function that does the same as
+   * 'getauxval' on Linux. */
+
+  err = elf_aux_info (type, &auxval, sizeof(auxval));
+  if (err)
+    {
+      errno = err;
+      auxval = 0;
+    }
+
+  return auxval;
+}
+#endif
+
+
+#undef HAS_SYS_AT_HWCAP
+#if defined(__linux__) || \
+    (defined(HAVE_SYS_AUXV_H) && defined(HAVE_GETAUXVAL))
+#define HAS_SYS_AT_HWCAP 1
+
+struct feature_map_s
+  {
+    unsigned int hwcap_flag;
+    unsigned int hwcap2_flag;
+    unsigned int hwf_flag;
+  };
+
+#if defined(__powerpc__) || defined(__powerpc64__)
+
+/* Note: These macros have same values on Linux and FreeBSD. */
+#ifndef AT_HWCAP
+# define AT_HWCAP      16
+#endif
+#ifndef AT_HWCAP2
+# define AT_HWCAP2     26
+#endif
+
+#ifndef PPC_FEATURE2_ARCH_2_07
+# define PPC_FEATURE2_ARCH_2_07     0x80000000
+#endif
+#ifndef PPC_FEATURE2_VEC_CRYPTO
+# define PPC_FEATURE2_VEC_CRYPTO    0x02000000
+#endif
+#ifndef PPC_FEATURE2_ARCH_3_00
+# define PPC_FEATURE2_ARCH_3_00     0x00800000
+#endif
+
+static const struct feature_map_s ppc_features[] =
+  {
+    { 0, PPC_FEATURE2_ARCH_2_07, HWF_PPC_ARCH_2_07 },
+#ifdef ENABLE_PPC_CRYPTO_SUPPORT
+    { 0, PPC_FEATURE2_VEC_CRYPTO, HWF_PPC_VCRYPTO },
+#endif
+    { 0, PPC_FEATURE2_ARCH_3_00, HWF_PPC_ARCH_3_00 },
+  };
+#endif
+
+static int
+get_hwcap(unsigned int *hwcap, unsigned int *hwcap2)
+{
+  struct { unsigned long a_type; unsigned long a_val; } auxv;
+  FILE *f;
+  int err = -1;
+  static int hwcap_initialized = 0;
+  static unsigned int stored_hwcap = 0;
+  static unsigned int stored_hwcap2 = 0;
+
+  if (hwcap_initialized)
+    {
+      *hwcap = stored_hwcap;
+      *hwcap2 = stored_hwcap2;
+      return 0;
+    }
+
+#if 0 // TODO: configure.ac detection for __builtin_cpu_supports
+      // TODO: move to 'detect_ppc_builtin_cpu_supports'
+#if defined(__GLIBC__) && defined(__GNUC__) && __GNUC__ >= 6
+  /* __builtin_cpu_supports returns 0 if glibc support doesn't exist, so
+   * we can only trust positive results. */
+#ifdef ENABLE_PPC_CRYPTO_SUPPORT
+  if (__builtin_cpu_supports("vcrypto")) /* TODO: Configure.ac */
+    {
+      stored_hwcap2 |= PPC_FEATURE2_VEC_CRYPTO;
+      hwcap_initialized = 1;
+    }
+#endif
+
+  if (__builtin_cpu_supports("arch_3_00")) /* TODO: Configure.ac */
+    {
+      stored_hwcap2 |= PPC_FEATURE2_ARCH_3_00;
+      hwcap_initialized = 1;
+    }
+#endif
+#endif
+
+#if defined(HAVE_SYS_AUXV_H) && defined(HAVE_GETAUXVAL)
+  errno = 0;
+  auxv.a_val = getauxval (AT_HWCAP);
+  if (errno == 0)
+    {
+      stored_hwcap |= auxv.a_val;
+      hwcap_initialized = 1;
+    }
+
+  if (AT_HWCAP2 >= 0)
+    {
+      errno = 0;
+      auxv.a_val = getauxval (AT_HWCAP2);
+      if (errno == 0)
+	{
+	  stored_hwcap2 |= auxv.a_val;
+	  hwcap_initialized = 1;
+	}
+    }
+
+  if (hwcap_initialized && (stored_hwcap || stored_hwcap2))
+    {
+      *hwcap = stored_hwcap;
+      *hwcap2 = stored_hwcap2;
+      return 0;
+    }
+#endif
+
+  f = fopen("/proc/self/auxv", "r");
+  if (!f)
+    {
+      *hwcap = stored_hwcap;
+      *hwcap2 = stored_hwcap2;
+      return -1;
+    }
+
+  while (fread(&auxv, sizeof(auxv), 1, f) > 0)
+    {
+      if (auxv.a_type == AT_HWCAP)
+        {
+          stored_hwcap |= auxv.a_val;
+          hwcap_initialized = 1;
+        }
+
+      if (auxv.a_type == AT_HWCAP2)
+        {
+          stored_hwcap2 |= auxv.a_val;
+          hwcap_initialized = 1;
+        }
+    }
+
+  if (hwcap_initialized)
+      err = 0;
+
+  fclose(f);
+
+  *hwcap = stored_hwcap;
+  *hwcap2 = stored_hwcap2;
+  return err;
+}
+
+static unsigned int
+detect_ppc_at_hwcap(void)
+{
+  unsigned int hwcap;
+  unsigned int hwcap2;
+  unsigned int features = 0;
+  unsigned int i;
+
+  if (get_hwcap(&hwcap, &hwcap2) < 0)
+      return features;
+
+  for (i = 0; i < DIM(ppc_features); i++)
+    {
+      if (hwcap & ppc_features[i].hwcap_flag)
+        features |= ppc_features[i].hwf_flag;
+
+      if (hwcap2 & ppc_features[i].hwcap2_flag)
+        features |= ppc_features[i].hwf_flag;
+    }
+
+  return features;
+}
+
+#endif
+
+unsigned int
+_gcry_hwf_detect_ppc (void)
+{
+  unsigned int ret = 0;
+  unsigned int broken_hwfs = 0;
+
+#if defined (HAS_SYS_AT_HWCAP)
+  ret |= detect_ppc_at_hwcap ();
+#endif
+
+  ret &= ~broken_hwfs;
+
+  return ret;
+}
diff -up libgcrypt-1.8.5/src/Makefile.am.aes-perf libgcrypt-1.8.5/src/Makefile.am
--- libgcrypt-1.8.5/src/Makefile.am.aes-perf	2018-11-14 14:16:40.000000000 +0100
+++ libgcrypt-1.8.5/src/Makefile.am	2020-04-22 18:29:41.683861981 +0200
@@ -66,7 +66,7 @@ libgcrypt_la_SOURCES = \
 	hmac256.c hmac256.h context.c context.h \
 	ec-context.h
 
-EXTRA_libgcrypt_la_SOURCES = hwf-x86.c hwf-arm.c
+EXTRA_libgcrypt_la_SOURCES = hwf-x86.c hwf-arm.c hwf-ppc.c
 gcrypt_hwf_modules = @GCRYPT_HWF_MODULES@